*** a/doc/src/sgml/textsearch.sgml
--- b/doc/src/sgml/textsearch.sgml
***************
*** 2615,2632 **** SELECT plainto_tsquery('supernova star');
! To create an Ispell> dictionary, use the built-in
! ispell template and specify several parameters:
!
! CREATE TEXT SEARCH DICTIONARY english_ispell (
TEMPLATE = ispell,
! DictFile = english,
! AffFile = english,
! StopWords = english
! );
Here, DictFile>, AffFile>, and StopWords>
--- 2615,2655 ----
! To create an Ispell> dictionary perform these steps:
!
!
!
! download dictionary configuration files. OpenOffice>
! extension files have the .oxt> extension. It is necessary
! to extract .aff> and .dic> files, change extensions
! to .affix> and .dict>. For some dictionary
! files it is also needed to convert characters to the UTF-8 encoding
! with commands (for example, for norwegian language dictionary):
! iconv -f ISO_8859-1 -t UTF-8 -o nn_no.affix nn_NO.aff
! iconv -f ISO_8859-1 -t UTF-8 -o nn_no.dict nn_NO.dic
!
!
!
!
!
! copy files to the $SHAREDIR/tsearch_data> directory
!
!
!
!
! load files into PostgreSQL with the following command:
!
! CREATE TEXT SEARCH DICTIONARY english_hunspell (
TEMPLATE = ispell,
! DictFile = en_us,
! AffFile = en_us,
! Stopwords = english);
+
+
+
Here, DictFile>, AffFile>, and StopWords>
***************
*** 2643,2648 **** CREATE TEXT SEARCH DICTIONARY english_ispell (
--- 2666,2720 ----
+ The .affix> file of Ispell> has the following structure:
+
+ prefixes
+ flag *A:
+ . > RE # As in enter > reenter
+ suffixes
+ flag T:
+ E > ST # As in late > latest
+ [^AEIOU]Y > -Y,IEST # As in dirty > dirtiest
+ [AEIOU]Y > EST # As in gray > grayest
+ [^EY] > EST # As in small > smallest
+
+
+
+ And the .dict> file has the following structure:
+
+ lapse/ADGRS
+ lard/DGRS
+ large/PRTY
+ lark/MRS
+
+
+
+
+ Format of the .dict> file is:
+
+ basic_form/affix_class_name
+
+
+
+
+ In the .affix> file every affix flag is described in the
+ following format:
+
+ condition > [-stripping_letters,] adding_affix
+
+
+
+
+ Here, condition has a format similar to the format of regular expressions.
+ It can use groupings [...]> and [^...]>.
+ For example, [AEIOU]Y> means that the last letter of the word
+ is "y"> and the penultimate letter is "a">,
+ "e">, "i">, "o"> or "u">.
+ [^EY]> means that the last letter is neither "e">
+ nor "y">.
+
+
+
Ispell dictionaries support splitting compound words;
a useful feature.
Notice that the affix file should specify a special flag using the
***************
*** 2663,2668 **** SELECT ts_lexize('norwegian_ispell', 'sjokoladefabrikk');
--- 2735,2796 ----
+
+ MySpell> is very similar to Hunspell>.
+ The .affix> file of Hunspell> has the following structure:
+
+ PFX A Y 1
+ PFX A 0 re .
+ SFX T N 4
+ SFX T 0 st e
+ SFX T y iest [^aeiou]y
+ SFX T 0 est [aeiou]y
+ SFX T 0 est [^ey]
+
+
+
+
+ The first line of an affix class is the header. Fields of an affix rules are listed after the header:
+
+
+
+
+ parameter name (PFX or SFX)
+
+
+
+
+ flag (name of the affix class)
+
+
+
+
+ stripping characters from beginning (at prefix) or end (at suffix) of the word
+
+
+
+
+ adding affix
+
+
+
+
+ condition that has a format similar to the format of regular expressions.
+
+
+
+
+
+ The .dict> file looks like the .dict> file of
+ Ispell>:
+
+ larder/M
+ lardy/RT
+ large/RSPMYT
+ largehearted
+
+
+
MySpell> does not support compound words.
*** a/src/backend/tsearch/Makefile
--- b/src/backend/tsearch/Makefile
***************
*** 13,20 **** include $(top_builddir)/src/Makefile.global
DICTDIR=tsearch_data
! DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
! ispell_sample.affix ispell_sample.dict
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
--- 13,22 ----
DICTDIR=tsearch_data
! DICTFILES=dicts/synonym_sample.syn dicts/thesaurus_sample.ths \
! dicts/hunspell_sample.affix dicts/ispell_sample.affix dicts/ispell_sample.dict \
! dicts/hunspell_sample_long.affix dicts/hunspell_sample_long.dict \
! dicts/hunspell_sample_num.affix dicts/hunspell_sample_num.dict
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \
*** /dev/null
--- b/src/backend/tsearch/dicts/hunspell_sample.affix
***************
*** 0 ****
--- 1,24 ----
+ COMPOUNDFLAG Z
+ ONLYINCOMPOUND L
+
+ PFX B Y 1
+ PFX B 0 re .
+
+ PFX U N 1
+ PFX U 0 un .
+
+ SFX J Y 1
+ SFX J 0 INGS [^E]
+
+ SFX G Y 1
+ SFX G 0 ING [^E]
+
+ SFX S Y 1
+ SFX S 0 S [^SXZHY]
+
+ SFX A Y 1
+ SFX A Y IES [^AEIOU]Y
+
+ SFX \ N 1
+ SFX \ 0 Y/L [^Y]
+
*** /dev/null
--- b/src/backend/tsearch/dicts/hunspell_sample_long.affix
***************
*** 0 ****
--- 1,35 ----
+ FLAG long
+
+ AF 7
+ AF cZ #1
+ AF cL #2
+ AF sGsJpUsS #3
+ AF sSpB #4
+ AF cZsS #5
+ AF sScZs\ #6
+ AF sA #7
+
+ COMPOUNDFLAG cZ
+ ONLYINCOMPOUND cL
+
+ PFX pB Y 1
+ PFX pB 0 re .
+
+ PFX pU N 1
+ PFX pU 0 un .
+
+ SFX sJ Y 1
+ SFX sJ 0 INGS [^E]
+
+ SFX sG Y 1
+ SFX sG 0 ING [^E]
+
+ SFX sS Y 1
+ SFX sS 0 S [^SXZHY]
+
+ SFX sA Y 1
+ SFX sA Y IES [^AEIOU]Y
+
+ SFX s\ N 1
+ SFX s\ 0 Y/2 [^Y]
+
*** /dev/null
--- b/src/backend/tsearch/dicts/hunspell_sample_long.dict
***************
*** 0 ****
--- 1,8 ----
+ book/3
+ booking/4
+ footballklubber
+ foot/5
+ football/1
+ ball/6
+ klubber/1
+ sky/7
*** /dev/null
--- b/src/backend/tsearch/dicts/hunspell_sample_num.affix
***************
*** 0 ****
--- 1,26 ----
+ FLAG num
+
+ COMPOUNDFLAG 101
+ ONLYINCOMPOUND 102
+
+ PFX 201 Y 1
+ PFX 201 0 re .
+
+ PFX 202 N 1
+ PFX 202 0 un .
+
+ SFX 301 Y 1
+ SFX 301 0 INGS [^E]
+
+ SFX 302 Y 1
+ SFX 302 0 ING [^E]
+
+ SFX 303 Y 1
+ SFX 303 0 S [^SXZHY]
+
+ SFX 304 Y 1
+ SFX 304 Y IES [^AEIOU]Y
+
+ SFX 305 N 1
+ SFX 305 0 Y/102 [^Y]
+
*** /dev/null
--- b/src/backend/tsearch/dicts/hunspell_sample_num.dict
***************
*** 0 ****
--- 1,8 ----
+ book/302,301,202,303
+ booking/303,201
+ footballklubber
+ foot/101,303
+ football/101
+ ball/303,101,305
+ klubber/101
+ sky/304
*** /dev/null
--- b/src/backend/tsearch/dicts/ispell_sample.affix
***************
*** 0 ****
--- 1,26 ----
+ compoundwords controlled Z
+
+ prefixes
+
+ flag *B:
+ . > RE # As in enter > reenter
+
+ flag U:
+ . > UN # As in natural > unnatural
+
+ suffixes
+
+ flag *J:
+ [^E] > INGS # As in cross > crossings
+
+ flag *G:
+ [^E] > ING # As in cross > crossing
+
+ flag *S:
+ [^SXZHY] > S # As in bat > bats
+
+ flag *A:
+ [^AEIOU]Y > -Y,IES # As in imply > implies
+
+ flag ~\\:
+ [^Y] > Y #~ advarsel > advarsely-
*** /dev/null
--- b/src/backend/tsearch/dicts/ispell_sample.dict
***************
*** 0 ****
--- 1,8 ----
+ book/GJUS
+ booking/SB
+ footballklubber
+ foot/ZS
+ football/Z
+ ball/SZ\
+ klubber/Z
+ sky/A
*** /dev/null
--- b/src/backend/tsearch/dicts/synonym_sample.syn
***************
*** 0 ****
--- 1,5 ----
+ postgres pgsql
+ postgresql pgsql
+ postgre pgsql
+ gogle googl
+ indices index*
*** /dev/null
--- b/src/backend/tsearch/dicts/thesaurus_sample.ths
***************
*** 0 ****
--- 1,17 ----
+ #
+ # Theasurus config file. Character ':' separates string from replacement, eg
+ # sample-words : substitute-words
+ #
+ # Any substitute-word can be marked by preceding '*' character,
+ # which means do not lexize this word
+ # Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
+
+ one two three : *123
+ one two : *12
+ one : *1
+ two : *2
+
+ supernovae stars : *sn
+ supernovae : *sn
+ booking tickets : order invitation cards
+ booking ? tickets : order invitation Cards
*** a/src/backend/tsearch/hunspell_sample.affix
--- /dev/null
***************
*** 1,24 ****
- COMPOUNDFLAG Z
- ONLYINCOMPOUND L
-
- PFX B Y 1
- PFX B 0 re .
-
- PFX U N 1
- PFX U 0 un .
-
- SFX J Y 1
- SFX J 0 INGS [^E]
-
- SFX G Y 1
- SFX G 0 ING [^E]
-
- SFX S Y 1
- SFX S 0 S [^SXZHY]
-
- SFX A Y 1
- SFX A Y IES [^AEIOU]Y
-
- SFX \ N 1
- SFX \ 0 Y/L [^Y]
-
--- 0 ----
*** a/src/backend/tsearch/ispell_sample.affix
--- /dev/null
***************
*** 1,26 ****
- compoundwords controlled Z
-
- prefixes
-
- flag *B:
- . > RE # As in enter > reenter
-
- flag U:
- . > UN # As in natural > unnatural
-
- suffixes
-
- flag *J:
- [^E] > INGS # As in cross > crossings
-
- flag *G:
- [^E] > ING # As in cross > crossing
-
- flag *S:
- [^SXZHY] > S # As in bat > bats
-
- flag *A:
- [^AEIOU]Y > -Y,IES # As in imply > implies
-
- flag ~\\:
- [^Y] > Y #~ advarsel > advarsely-
--- 0 ----
*** a/src/backend/tsearch/ispell_sample.dict
--- /dev/null
***************
*** 1,8 ****
- book/GJUS
- booking/SB
- footballklubber
- foot/ZS
- football/Z
- ball/SZ\
- klubber/Z
- sky/A
--- 0 ----
*** a/src/backend/tsearch/spell.c
--- b/src/backend/tsearch/spell.c
***************
*** 5,10 ****
--- 5,56 ----
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
*
+ * Ispell dictionary
+ * --------------------------------
+ *
+ * Rules of dictionaries are defined in two files with .affix and .dict
+ * extensions. They are used by spell checker programs Ispell and Hunspell.
+ *
+ * An .affix file declares morphological rules to get a basic form of words.
+ * The format of an .affix file has different structure for Ispell and Hunspell
+ * dictionaries. The Hunspell format is more complicated. But when an .affix
+ * file is imported and compiled, it is stored in the same structure AffixNode.
+ *
+ * A .dict file stores a list of basic forms of words with references to
+ * affix rules. The format of a .dict file has the same structure for Ispell
+ * and Hunspell dictionaries.
+ *
+ * Compilation of a dictionary
+ * ---------------------------
+ *
+ * A compiled dictionary is stored in the IspellDict structure. Compilation of
+ * a dictionary is divided into the several steps:
+ * - NIImportDictionary() - stores each word of a .dict file in the
+ * temporary Spell field.
+ * - NIImportAffixes() - stores affix rules of an .affix file in the
+ * Affix field (not temporary) if an .affix file has the Ispell format.
+ * -> NIImportOOAffixes() - stores affix rules if an .affix file has the
+ * Hunspell format. The AffixData field is initialized if AF parameter
+ * is defined.
+ * - NISortDictionary() - builds a prefix tree (Trie) from the words list
+ * and stores it in the Dictionary field. The words list is got from the
+ * Spell field. The AffixData field is initialized if AF parameter is not defined.
+ * - NISortAffixes():
+ * - builds a list of compond affixes from the affix list and stores it
+ * in the CompoundAffix.
+ * - builds prefix trees (Trie) from the affix list for prefixes and suffixes
+ * and stores them in Suffix and Prefix fields.
+ * The affix list is got from the Affix field.
+ *
+ * Memory management
+ * -----------------
+ *
+ * The IspellDict structure has the Spell field which is used only in compile
+ * time. The Spell field stores a words list. It can take a lot of memory.
+ * Therefore when a dictionary is compiled this field is cleared by NIFinishBuild().
+ *
+ * All resources which should cleared by NIFinishBuild() is initialized using
+ * tmpalloc() and tmpalloc0().
*
* IDENTIFICATION
* src/backend/tsearch/spell.c
***************
*** 153,159 **** cmpspell(const void *s1, const void *s2)
static int
cmpspellaffix(const void *s1, const void *s2)
{
! return (strncmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag, MAXFLAGLEN));
}
static char *
--- 199,205 ----
static int
cmpspellaffix(const void *s1, const void *s2)
{
! return (strcmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag));
}
static char *
***************
*** 220,225 **** strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
--- 266,276 ----
return 0;
}
+ /*
+ * Compares affixes.
+ * First compares the type of an affix. Prefixes should go before affixes.
+ * If types are equal then compares replaceable string.
+ */
static int
cmpaffix(const void *s1, const void *s2)
{
***************
*** 237,242 **** cmpaffix(const void *s1, const void *s2)
--- 288,426 ----
(const unsigned char *) a2->repl);
}
+ /*
+ * Gets an affix flag from string representation (a set of affixes).
+ *
+ * Several flags can be stored in a single string. Flags can be represented by:
+ * - 1 character (FM_CHAR).
+ * - 2 characters (FM_LONG).
+ * - numbers from 1 to 65000 (FM_NUM).
+ *
+ * Depending on the flagMode an affix string can have the following format:
+ * - FM_CHAR: ABCD
+ * Here we have 4 flags: A, B, C and D
+ * - FM_LONG: ABCDE*
+ * Here we have 3 flags: AB, CD and E*
+ * - FM_NUM: 200,205,50
+ * Here we have 3 flags: 200, 205 and 50
+ *
+ * Conf: current dictionary.
+ * sflag: string representation (a set of affixes) of an affix flag.
+ * sflagnext: returns reference to the start of a next affix flag in the sflag.
+ *
+ * Returns an integer representation of the affix flag.
+ */
+ static unsigned short
+ DecodeFlag(IspellDict *Conf, char *sflag, char **sflagnext)
+ {
+ unsigned short s;
+ char *next;
+
+ switch (Conf->flagMode)
+ {
+ case FM_LONG:
+ if ((int)sflag[0] > FLAGCHAR_MAXSIZE || (int)sflag[1] > FLAGCHAR_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("invalid affix flag \"%s\"", sflag)));
+
+ s = (int)sflag[0] << 8 | (int)sflag[1];
+ if (sflagnext)
+ /* Go to start of the next flag */
+ *sflagnext = sflag + pg_mblen(sflag) * 2;
+ break;
+ case FM_NUM:
+ s = (unsigned short) strtol(sflag, &next, 10);
+ if (s > FLAGNUM_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("invalid affix flag \"%s\"", sflag)));
+
+ if (sflagnext)
+ {
+ /* Go to start of the next flag */
+ if (next)
+ {
+ *sflagnext = next;
+ while (**sflagnext)
+ {
+ if (**sflagnext == ',')
+ {
+ /* Found start of the next flag */
+ *sflagnext += pg_mblen(*sflagnext);
+ break;
+ }
+ *sflagnext += pg_mblen(*sflagnext);
+ }
+ }
+ else
+ *sflagnext = 0;
+ }
+ break;
+ default:
+ s = (unsigned short) *((unsigned char *)sflag);
+ if (s > FLAGCHAR_MAXSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("invalid affix flag \"%s\"", sflag)));
+
+ if (sflagnext)
+ /* Go to start of the next flag */
+ *sflagnext = sflag + pg_mblen(sflag);
+ }
+
+ return s;
+ }
+
+ /*
+ * Checks if the affix set Conf->AffixData[affix] contains affixflag.
+ * Conf->AffixData[affix] is the string representation of an affix flags.
+ * Conf->AffixData[affix] does not contain affixflag if this flag is not used
+ * actually by the .dict file.
+ *
+ * Conf: current dictionary.
+ * affix: index of the Conf->AffixData array.
+ * affixflag: integer representation of the affix flag.
+ *
+ * Returns true if the string Conf->AffixData[affix] contains affixflag,
+ * otherwise returns false.
+ */
+ static bool
+ IsAffixFlagInUse(IspellDict *Conf, int affix, unsigned short affixflag)
+ {
+ char *flagcur;
+ char *flagnext = 0;
+
+ if (affixflag == 0)
+ return true;
+
+ flagcur = Conf->AffixData[affix];
+
+ while (*flagcur)
+ {
+ /* Compare first affix flag in flagcur with affixflag */
+ if (DecodeFlag(Conf, flagcur, &flagnext) == affixflag)
+ return true;
+ /* Otherwise go to next flag */
+ if (flagnext)
+ flagcur = flagnext;
+ /* If we have not flags anymore then exit */
+ else
+ break;
+ }
+
+ /* Could not find affixflag */
+ return false;
+ }
+
+ /*
+ * Adds the new word into the temporary array Spell.
+ *
+ * Conf: current dictionary.
+ * word: new word.
+ * flag: set of affix flags. Integer representation of flag can be got by
+ * DecodeFlag().
+ */
static void
NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
{
***************
*** 255,268 **** NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
}
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
strcpy(Conf->Spell[Conf->nspell]->word, word);
! strlcpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
Conf->nspell++;
}
/*
! * import dictionary
*
! * Note caller must already have applied get_tsearch_config_filename
*/
void
NIImportDictionary(IspellDict *Conf, const char *filename)
--- 439,455 ----
}
Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
strcpy(Conf->Spell[Conf->nspell]->word, word);
! Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') ? cpstrdup(Conf, flag) : VoidString;
Conf->nspell++;
}
/*
! * Imports dictionary into the temporary array Spell.
*
! * Note caller must already have applied get_tsearch_config_filename.
! *
! * Conf: current dictionary.
! * filename: path to the .dict file.
*/
void
NIImportDictionary(IspellDict *Conf, const char *filename)
***************
*** 280,285 **** NIImportDictionary(IspellDict *Conf, const char *filename)
--- 467,473 ----
{
char *s,
*pstr;
+ /* Set of affix flags */
const char *flag;
/* Extract flag from the line */
***************
*** 324,330 **** NIImportDictionary(IspellDict *Conf, const char *filename)
tsearch_readline_end(&trst);
}
!
static int
FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
{
--- 512,541 ----
tsearch_readline_end(&trst);
}
! /*
! * Searches a basic form of word in the prefix tree. This word was generated
! * using an affix rule. This rule may not be presented in an affix set of
! * a basic form of word.
! *
! * For example, we have the entry in the .dict file:
! * meter/GMD
! *
! * The affix rule with the flag S:
! * SFX S y ies [^aeiou]y
! * is not presented here.
! *
! * The affix rule with the flag M:
! * SFX M 0 's .
! * is presented here.
! *
! * Conf: current dictionary.
! * word: basic form of word.
! * affixflag: integer representation of the affix flag, by which a basic form of
! * word was generated.
! * flag: compound flag used to compare with StopMiddle->compoundflag.
! *
! * Returns 1 if the word was found in the prefix tree, else returns 0.
! */
static int
FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
{
***************
*** 349,361 **** FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
{
if (flag == 0)
{
if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
return 0;
}
else if ((flag & StopMiddle->compoundflag) == 0)
return 0;
! if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
return 1;
}
node = StopMiddle->node;
--- 560,581 ----
{
if (flag == 0)
{
+ /*
+ * The word can be formed only with another word.
+ * And in the flag parameter there is not a sign
+ * that we search compound words.
+ */
if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
return 0;
}
else if ((flag & StopMiddle->compoundflag) == 0)
return 0;
! /*
! * Check if this affix rule is presented in the affix set
! * with index StopMiddle->affix.
! */
! if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
return 1;
}
node = StopMiddle->node;
***************
*** 373,378 **** FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
--- 593,616 ----
return 0;
}
+ /*
+ * Adds a new affix rule to the Affix field.
+ *
+ * Conf: current dictionary.
+ * flag: integer representation of the affix flag ('\' in the below example).
+ * flagflags: set of flags from the flagval field for this affix rule. This set
+ * is listed after '/' character in the added string (repl).
+ *
+ * For example L flag in the hunspell_sample.affix:
+ * SFX \ 0 Y/L [^Y]
+ *
+ * mask: condition for search ('[^Y]' in the above example).
+ * find: stripping characters from beginning (at prefix) or end (at suffix)
+ * of the word ('0' in the above example, 0 means that there is not
+ * stripping character).
+ * repl: adding string after stripping ('Y' in the above example).
+ * type: FF_SUFFIX or FF_PREFIX.
+ */
static void
NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
{
***************
*** 394,411 **** NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
Affix = Conf->Affix + Conf->naffixes;
! if (strcmp(mask, ".") == 0)
{
Affix->issimple = 1;
Affix->isregis = 0;
}
else if (RS_isRegis(mask))
{
Affix->issimple = 0;
Affix->isregis = 1;
! RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false,
*mask ? mask : VoidString);
}
else
{
int masklen;
--- 632,652 ----
Affix = Conf->Affix + Conf->naffixes;
! /* This affix rule can be applied for words with any ending */
! if (strcmp(mask, ".") == 0 || *mask == '\0')
{
Affix->issimple = 1;
Affix->isregis = 0;
}
+ /* This affix rule will use regis to search word ending */
else if (RS_isRegis(mask))
{
Affix->issimple = 0;
Affix->isregis = 1;
! RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
*mask ? mask : VoidString);
}
+ /* This affix rule will use regex_t to search word ending */
else
{
int masklen;
***************
*** 457,463 **** NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
Conf->naffixes++;
}
-
/* Parsing states for parse_affentry() and friends */
#define PAE_WAIT_MASK 0
#define PAE_INMASK 1
--- 698,703 ----
***************
*** 712,720 **** parse_affentry(char *str, char *mask, char *find, char *repl)
*pmask = *pfind = *prepl = '\0';
! return (*mask && (*find || *repl)) ? true : false;
}
static void
addFlagValue(IspellDict *Conf, char *s, uint32 val)
{
--- 952,967 ----
*pmask = *pfind = *prepl = '\0';
! return (*mask && (*find || *repl));
}
+ /*
+ * Sets up a correspondence for the affix parameter with the affix flag.
+ *
+ * Conf: current dictionary.
+ * s: affix flag in string.
+ * val: affix parameter.
+ */
static void
addFlagValue(IspellDict *Conf, char *s, uint32 val)
{
***************
*** 731,742 **** addFlagValue(IspellDict *Conf, char *s, uint32 val)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multibyte flag character is not allowed")));
! Conf->flagval[*(unsigned char *) s] = (unsigned char) val;
Conf->usecompound = true;
}
/*
! * Import an affix file that follows MySpell or Hunspell format
*/
static void
NIImportOOAffixes(IspellDict *Conf, const char *filename)
--- 978,1043 ----
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multibyte flag character is not allowed")));
! Conf->flagval[DecodeFlag(Conf, s, (char **)NULL)] = (unsigned char) val;
Conf->usecompound = true;
}
/*
! * Returns a set of affix parameters which correspondence to the set of affix
! * flags s.
! */
! static int
! getFlagValues(IspellDict *Conf, char *s)
! {
! uint32 flag = 0;
! char *flagcur;
! char *flagnext = 0;
!
! flagcur = s;
! while (*flagcur)
! {
! flag |= Conf->flagval[DecodeFlag(Conf, flagcur, &flagnext)];
! if (flagnext)
! flagcur = flagnext;
! else
! break;
! }
!
! return flag;
! }
!
! /*
! * Returns a flag set using the s parameter.
! *
! * If Conf->useFlagAliases is true then the s parameter is index of the
! * Conf->AffixData array and function returns its entry.
! * Else function returns the s parameter.
! */
! static char *
! getFlags(IspellDict *Conf, char *s)
! {
! int curaffix;
! if (Conf->useFlagAliases)
! {
! curaffix = strtol(s, (char **)NULL, 10);
! if (curaffix && curaffix <= Conf->nAffixData)
! /*
! * Do not substract 1 from curaffix
! * because empty string was added in NIImportOOAffixes
! */
! return Conf->AffixData[curaffix];
! else
! return VoidString;
! }
! else
! return s;
! }
!
! /*
! * Import an affix file that follows MySpell or Hunspell format.
! *
! * Conf: current dictionary.
! * filename: path to the .affix file.
*/
static void
NIImportOOAffixes(IspellDict *Conf, const char *filename)
***************
*** 751,757 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
char repl[BUFSIZ],
*prepl;
bool isSuffix = false;
! int flag = 0;
char flagflags = 0;
tsearch_readline_state trst;
char *recoded;
--- 1052,1061 ----
char repl[BUFSIZ],
*prepl;
bool isSuffix = false;
! int naffix = 0,
! curaffix = 0;
! int flag = 0,
! sflaglen = 0;
char flagflags = 0;
tsearch_readline_state trst;
char *recoded;
***************
*** 759,764 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
--- 1063,1070 ----
/* read file to find any flag */
memset(Conf->flagval, 0, sizeof(Conf->flagval));
Conf->usecompound = false;
+ Conf->useFlagAliases = false;
+ Conf->flagMode = FM_CHAR;
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
***************
*** 806,815 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
while (*s && t_isspace(s))
s += pg_mblen(s);
! if (*s && STRNCMP(s, "default") != 0)
! ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
! errmsg("Ispell dictionary supports only default flag value")));
}
pfree(recoded);
--- 1112,1128 ----
while (*s && t_isspace(s))
s += pg_mblen(s);
! if (*s)
! {
! if (STRNCMP(s, "long") == 0)
! Conf->flagMode = FM_LONG;
! else if (STRNCMP(s, "num") == 0)
! Conf->flagMode = FM_NUM;
! else if (STRNCMP(s, "default") != 0)
! ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
! errmsg("Ispell dictionary supports only default, long and num flag value")));
! }
}
pfree(recoded);
***************
*** 834,860 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
if (ptype)
pfree(ptype);
ptype = lowerstr_ctx(Conf, type);
if (fields_read < 4 ||
(STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
goto nextline;
if (fields_read == 4)
{
! if (strlen(sflag) != 1)
! goto nextline;
! flag = *sflag;
! isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
flagflags = FF_CROSSPRODUCT;
else
flagflags = 0;
}
else
{
char *ptr;
int aflg = 0;
! if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
goto nextline;
prepl = lowerstr_ctx(Conf, repl);
/* Find position of '/' in lowercased string "prepl" */
--- 1147,1223 ----
if (ptype)
pfree(ptype);
ptype = lowerstr_ctx(Conf, type);
+
+ /* First try to parse AF parameter (alias compression) */
+ if (STRNCMP(ptype, "af") == 0)
+ {
+ /* First line is the number of aliases */
+ if (!Conf->useFlagAliases)
+ {
+ Conf->useFlagAliases = true;
+ naffix = atoi(sflag);
+ if (naffix == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("invalid number of flag vector aliases")));
+
+ /* Also reserve place for empty flag set */
+ naffix++;
+
+ Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
+ Conf->lenAffixData = Conf->nAffixData = naffix;
+
+ /* Add empty flag set into AffixData */
+ Conf->AffixData[curaffix] = VoidString;
+ curaffix++;
+ }
+ /* Other lines is aliases */
+ else
+ {
+ if (curaffix < naffix)
+ {
+ Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
+ curaffix++;
+ }
+ }
+ goto nextline;
+ }
+ /* Else try to parse prefixes and suffixes */
if (fields_read < 4 ||
(STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
goto nextline;
+ sflaglen = strlen(sflag);
+ if (sflaglen == 0
+ || (sflaglen > 1 && Conf->flagMode == FM_CHAR)
+ || (sflaglen > 2 && Conf->flagMode == FM_LONG))
+ goto nextline;
+
+ /*
+ * Affix header. For example:
+ * SFX \ N 1
+ */
if (fields_read == 4)
{
! /* Convert the affix flag to int */
! flag = DecodeFlag(Conf, sflag, (char **)NULL);
!
! isSuffix = (STRNCMP(ptype, "sfx") == 0);
if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
flagflags = FF_CROSSPRODUCT;
else
flagflags = 0;
}
+ /*
+ * Affix fields. For example:
+ * SFX \ 0 Y/L [^Y]
+ */
else
{
char *ptr;
int aflg = 0;
! if (flag == 0)
goto nextline;
prepl = lowerstr_ctx(Conf, repl);
/* Find position of '/' in lowercased string "prepl" */
***************
*** 866,876 **** NIImportOOAffixes(IspellDict *Conf, const char *filename)
*/
*ptr = '\0';
ptr = repl + (ptr - prepl) + 1;
! while (*ptr)
! {
! aflg |= Conf->flagval[*(unsigned char *) ptr];
! ptr++;
! }
}
pfind = lowerstr_ctx(Conf, find);
pmask = lowerstr_ctx(Conf, mask);
--- 1229,1235 ----
*/
*ptr = '\0';
ptr = repl + (ptr - prepl) + 1;
! aflg |= getFlagValues(Conf, getFlags(Conf, ptr));
}
pfind = lowerstr_ctx(Conf, find);
pmask = lowerstr_ctx(Conf, mask);
***************
*** 928,933 **** NIImportAffixes(IspellDict *Conf, const char *filename)
--- 1287,1294 ----
memset(Conf->flagval, 0, sizeof(Conf->flagval));
Conf->usecompound = false;
+ Conf->useFlagAliases = false;
+ Conf->flagMode = FM_CHAR;
while ((recoded = tsearch_readline(&trst)) != NULL)
{
***************
*** 1044,1049 **** isnewformat:
--- 1405,1415 ----
NIImportOOAffixes(Conf, filename);
}
+ /*
+ * Merges two affix flag sets and stores a new affix flag set into Conf->AffixData.
+ *
+ * Returns index of a new affix flag set.
+ */
static int
MergeAffix(IspellDict *Conf, int a1, int a2)
{
***************
*** 1068,1088 **** MergeAffix(IspellDict *Conf, int a1, int a2)
return Conf->nAffixData - 1;
}
static uint32
makeCompoundFlags(IspellDict *Conf, int affix)
{
! uint32 flag = 0;
! char *str = Conf->AffixData[affix];
!
! while (str && *str)
! {
! flag |= Conf->flagval[*(unsigned char *) str];
! str++;
! }
!
! return (flag & FF_DICTFLAGMASK);
}
static SPNode *
mkSPNode(IspellDict *Conf, int low, int high, int level)
{
--- 1434,1458 ----
return Conf->nAffixData - 1;
}
+ /*
+ * Returns a set of affix parameters which correspondence to the set of affix
+ * flags with the given index.
+ */
static uint32
makeCompoundFlags(IspellDict *Conf, int affix)
{
! char *str = Conf->AffixData[affix];
! return (getFlagValues(Conf, str) & FF_DICTFLAGMASK);
}
+ /*
+ * Makes a prefix tree for the given level.
+ *
+ * Conf: current dictionary.
+ * low: lower index of the Conf->Spell array.
+ * high: upper index of the Conf->Spell array.
+ * level: current prefix tree level.
+ */
static SPNode *
mkSPNode(IspellDict *Conf, int low, int high, int level)
{
***************
*** 1115,1120 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
--- 1485,1491 ----
{
if (lastchar)
{
+ /* Next level of the prefix tree */
data->node = mkSPNode(Conf, lownew, i, level + 1);
lownew = i;
data++;
***************
*** 1154,1159 **** mkSPNode(IspellDict *Conf, int low, int high, int level)
--- 1525,1531 ----
}
}
+ /* Next level of the prefix tree */
data->node = mkSPNode(Conf, lownew, high, level + 1);
return rs;
***************
*** 1172,1215 **** NISortDictionary(IspellDict *Conf)
/* compress affixes */
- /* Count the number of different flags used in the dictionary */
-
- qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
-
- naffix = 0;
- for (i = 0; i < Conf->nspell; i++)
- {
- if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
- naffix++;
- }
-
/*
! * Fill in Conf->AffixData with the affixes that were used in the
! * dictionary. Replace textual flag-field of Conf->Spell entries with
! * indexes into Conf->AffixData array.
*/
! Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
!
! curaffix = -1;
! for (i = 0; i < Conf->nspell; i++)
{
! if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
{
! curaffix++;
! Assert(curaffix < naffix);
! Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
}
-
- Conf->Spell[i]->p.d.affix = curaffix;
- Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
}
! Conf->lenAffixData = Conf->nAffixData = naffix;
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
}
static AffixNode *
mkANode(IspellDict *Conf, int low, int high, int level, int type)
{
--- 1544,1622 ----
/* compress affixes */
/*
! * If we use flag aliases then we need to use Conf->AffixData filled
! * in the NIImportOOAffixes().
*/
! if (Conf->useFlagAliases)
{
! for (i = 0; i < Conf->nspell; i++)
{
! curaffix = strtol(Conf->Spell[i]->p.flag, (char **)NULL, 10);
! if (curaffix && curaffix <= Conf->nAffixData)
! Conf->Spell[i]->p.d.affix = curaffix;
! else
! /*
! * If Conf->Spell[i]->p.flag is empty, then get empty value of
! * Conf->AffixData (0 index).
! */
! Conf->Spell[i]->p.d.affix = 0;
! Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
}
}
+ /* Otherwise fill Conf->AffixData here */
+ else
+ {
+ /* Count the number of different flags used in the dictionary */
+ qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
+
+ naffix = 0;
+ for (i = 0; i < Conf->nspell; i++)
+ {
+ if (i == 0 || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag))
+ naffix++;
+ }
! /*
! * Fill in Conf->AffixData with the affixes that were used in the
! * dictionary. Replace textual flag-field of Conf->Spell entries with
! * indexes into Conf->AffixData array.
! */
! Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
!
! curaffix = -1;
! for (i = 0; i < Conf->nspell; i++)
! {
! if (i == 0 || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]))
! {
! curaffix++;
! Assert(curaffix < naffix);
! Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
! }
!
! Conf->Spell[i]->p.d.affix = curaffix;
! Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
! }
!
! Conf->lenAffixData = Conf->nAffixData = naffix;
! }
+ /* Start build a prefix tree */
qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
}
+ /*
+ * Makes a prefix tree for the given level using the repl string of an affix rule.
+ * Affixes with empty replace string do not include in the prefix tree. This
+ * affixes are included by mkVoidAffix().
+ *
+ * Conf: current dictionary.
+ * low: lower index of the Conf->Affix array.
+ * high: upper index of the Conf->Affix array.
+ * level: current prefix tree level.
+ * type: FF_SUFFIX or FF_PREFIX.
+ */
static AffixNode *
mkANode(IspellDict *Conf, int low, int high, int level, int type)
{
***************
*** 1247,1252 **** mkANode(IspellDict *Conf, int low, int high, int level, int type)
--- 1654,1660 ----
{
if (lastchar)
{
+ /* Next level of the prefix tree */
data->node = mkANode(Conf, lownew, i, level + 1, type);
if (naff)
{
***************
*** 1267,1272 **** mkANode(IspellDict *Conf, int low, int high, int level, int type)
--- 1675,1681 ----
}
}
+ /* Next level of the prefix tree */
data->node = mkANode(Conf, lownew, high, level + 1, type);
if (naff)
{
***************
*** 1281,1286 **** mkANode(IspellDict *Conf, int low, int high, int level, int type)
--- 1690,1699 ----
return rs;
}
+ /*
+ * Makes the root void node in the prefix tree. The root void node is created
+ * for affixes which have empty replace string ("repl" field).
+ */
static void
mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
{
***************
*** 1304,1314 **** mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
Conf->Prefix = Affix;
}
!
for (i = start; i < end; i++)
if (Conf->Affix[i].replen == 0)
cnt++;
if (cnt == 0)
return;
--- 1717,1728 ----
Conf->Prefix = Affix;
}
! /* Count affixes with empty replace string */
for (i = start; i < end; i++)
if (Conf->Affix[i].replen == 0)
cnt++;
+ /* There is not affixes with empty replace string */
if (cnt == 0)
return;
***************
*** 1324,1341 **** mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
}
}
static bool
! isAffixInUse(IspellDict *Conf, char flag)
{
int i;
for (i = 0; i < Conf->nAffixData; i++)
! if (strchr(Conf->AffixData[i], flag) != NULL)
return true;
return false;
}
void
NISortAffixes(IspellDict *Conf)
{
--- 1738,1768 ----
}
}
+ /*
+ * Checks if the affixflag is used by dictionary. Conf->AffixData does not
+ * contain affixflag if this flag is not used actually by the .dict file.
+ *
+ * Conf: current dictionary.
+ * affixflag: integer representation of the affix flag.
+ *
+ * Returns true if the Conf->AffixData array contains affixflag, otherwise
+ * returns false.
+ */
static bool
! isAffixInUse(IspellDict *Conf, unsigned short affixflag)
{
int i;
for (i = 0; i < Conf->nAffixData; i++)
! if (IsAffixFlagInUse(Conf, i, affixflag))
return true;
return false;
}
+ /*
+ * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
+ */
void
NISortAffixes(IspellDict *Conf)
{
***************
*** 1347,1352 **** NISortAffixes(IspellDict *Conf)
--- 1774,1780 ----
if (Conf->naffixes == 0)
return;
+ /* Store compound affixes in the Conf->CompoundAffix array */
if (Conf->naffixes > 1)
qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
***************
*** 1359,1365 **** NISortAffixes(IspellDict *Conf)
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
! isAffixInUse(Conf, (char) Affix->flag))
{
if (ptr == Conf->CompoundAffix ||
ptr->issuffix != (ptr - 1)->issuffix ||
--- 1787,1793 ----
firstsuffix = i;
if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
! isAffixInUse(Conf, Affix->flag))
{
if (ptr == Conf->CompoundAffix ||
ptr->issuffix != (ptr - 1)->issuffix ||
***************
*** 1370,1376 **** NISortAffixes(IspellDict *Conf)
/* leave only unique and minimals suffixes */
ptr->affix = Affix->repl;
ptr->len = Affix->replen;
! ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
ptr++;
}
}
--- 1798,1804 ----
/* leave only unique and minimals suffixes */
ptr->affix = Affix->repl;
ptr->len = Affix->replen;
! ptr->issuffix = (Affix->type == FF_SUFFIX);
ptr++;
}
}
***************
*** 1378,1383 **** NISortAffixes(IspellDict *Conf)
--- 1806,1812 ----
ptr->affix = NULL;
Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
+ /* Start build a prefix tree */
Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
mkVoidAffix(Conf, true, firstsuffix);
***************
*** 1825,1831 **** SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
if (StopLow < StopHigh)
{
! if (level == FF_COMPOUNDBEGIN)
compoundflag = FF_COMPOUNDBEGIN;
else if (level == wordlen - 1)
compoundflag = FF_COMPOUNDLAST;
--- 2254,2260 ----
if (StopLow < StopHigh)
{
! if (startpos == 0)
compoundflag = FF_COMPOUNDBEGIN;
else if (level == wordlen - 1)
compoundflag = FF_COMPOUNDLAST;
*** a/src/backend/tsearch/synonym_sample.syn
--- /dev/null
***************
*** 1,5 ****
- postgres pgsql
- postgresql pgsql
- postgre pgsql
- gogle googl
- indices index*
--- 0 ----
*** a/src/backend/tsearch/thesaurus_sample.ths
--- /dev/null
***************
*** 1,17 ****
- #
- # Theasurus config file. Character ':' separates string from replacement, eg
- # sample-words : substitute-words
- #
- # Any substitute-word can be marked by preceding '*' character,
- # which means do not lexize this word
- # Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
-
- one two three : *123
- one two : *12
- one : *1
- two : *2
-
- supernovae stars : *sn
- supernovae : *sn
- booking tickets : order invitation cards
- booking ? tickets : order invitation Cards
--- 0 ----
*** a/src/include/tsearch/dicts/spell.h
--- b/src/include/tsearch/dicts/spell.h
***************
*** 19,36 ****
#include "tsearch/ts_public.h"
/*
! * Max length of a flag name. Names longer than this will be truncated
! * to the maximum.
*/
- #define MAXFLAGLEN 16
-
struct SPNode;
typedef struct
{
uint32 val:8,
isword:1,
compoundflag:4,
affix:19;
struct SPNode *node;
} SPNodeData;
--- 19,36 ----
#include "tsearch/ts_public.h"
/*
! * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
! * a words list.
*/
struct SPNode;
typedef struct
{
uint32 val:8,
isword:1,
+ /* Stores compound flags listed below */
compoundflag:4,
+ /* Reference to an entry of the AffixData field */
affix:19;
struct SPNode *node;
} SPNodeData;
***************
*** 54,72 **** typedef struct SPNode
#define SPNHDRSZ (offsetof(SPNode,data))
!
typedef struct spell_struct
{
union
{
/*
! * flag is filled in by NIImportDictionary. After NISortDictionary, d
! * is valid and flag is invalid.
*/
! char flag[MAXFLAGLEN];
struct
{
int affix;
int len;
} d;
} p;
--- 54,77 ----
#define SPNHDRSZ (offsetof(SPNode,data))
! /*
! * Represents an entry in a words list.
! */
typedef struct spell_struct
{
union
{
/*
! * flag is filled in by NIImportDictionary(). After NISortDictionary(), d
! * is used instead of flag.
*/
! char *flag;
! /* d is used in mkSPNode() */
struct
{
+ /* Reference to an entry of the AffixData field */
int affix;
+ /* Length of the word */
int len;
} d;
} p;
***************
*** 75,84 **** typedef struct spell_struct
#define SPELLHDRSZ (offsetof(SPELL, word))
typedef struct aff_struct
{
! uint32 flag:8,
! type:1,
flagflags:7,
issimple:1,
isregis:1,
--- 80,93 ----
#define SPELLHDRSZ (offsetof(SPELL, word))
+ /*
+ * Represents an entry in an affix list.
+ */
typedef struct aff_struct
{
! uint32 flag:16;
! /* FF_SUFFIX or FF_PREFIX */
! uint32 type:1,
flagflags:7,
issimple:1,
isregis:1,
***************
*** 106,111 **** typedef struct aff_struct
--- 115,124 ----
#define FF_SUFFIX 1
#define FF_PREFIX 0
+ /*
+ * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
+ * an affix list.
+ */
struct AffixNode;
typedef struct
***************
*** 132,137 **** typedef struct
--- 145,160 ----
bool issuffix;
} CMPDAffix;
+ typedef enum
+ {
+ FM_CHAR,
+ FM_LONG,
+ FM_NUM
+ } FlagMode;
+
+ #define FLAGCHAR_MAXSIZE 255
+ #define FLAGNUM_MAXSIZE 65535
+
typedef struct
{
int maffixes;
***************
*** 142,155 **** typedef struct
AffixNode *Prefix;
SPNode *Dictionary;
char **AffixData;
int lenAffixData;
int nAffixData;
CMPDAffix *CompoundAffix;
! unsigned char flagval[256];
bool usecompound;
/*
* Remaining fields are only used during dictionary construction; they are
--- 165,181 ----
AffixNode *Prefix;
SPNode *Dictionary;
+ /* Array of sets of affixes */
char **AffixData;
int lenAffixData;
int nAffixData;
+ bool useFlagAliases;
CMPDAffix *CompoundAffix;
! unsigned char flagval[FLAGNUM_MAXSIZE];
bool usecompound;
+ FlagMode flagMode;
/*
* Remaining fields are only used during dictionary construction; they are
*** a/src/test/regress/expected/tsdicts.out
--- b/src/test/regress/expected/tsdicts.out
***************
*** 191,196 **** SELECT ts_lexize('hunspell', 'footballyklubber');
--- 191,388 ----
{foot,ball,klubber}
(1 row)
+ -- Test ISpell dictionary with hunspell affix file with FLAG long parameter
+ CREATE TEXT SEARCH DICTIONARY hunspell_long (
+ Template=ispell,
+ DictFile=hunspell_sample_long,
+ AffFile=hunspell_sample_long
+ );
+ SELECT ts_lexize('hunspell_long', 'skies');
+ ts_lexize
+ -----------
+ {sky}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'bookings');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'booking');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'foot');
+ ts_lexize
+ -----------
+ {foot}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'foots');
+ ts_lexize
+ -----------
+ {foot}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'rebookings');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'rebooking');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'rebook');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'unbookings');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'unbooking');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'unbook');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'footklubber');
+ ts_lexize
+ ----------------
+ {foot,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'footballklubber');
+ ts_lexize
+ ------------------------------------------------------
+ {footballklubber,foot,ball,klubber,football,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'ballyklubber');
+ ts_lexize
+ ----------------
+ {ball,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_long', 'footballyklubber');
+ ts_lexize
+ ---------------------
+ {foot,ball,klubber}
+ (1 row)
+
+ -- Test ISpell dictionary with hunspell affix file with FLAG num parameter
+ CREATE TEXT SEARCH DICTIONARY hunspell_num (
+ Template=ispell,
+ DictFile=hunspell_sample_num,
+ AffFile=hunspell_sample_num
+ );
+ SELECT ts_lexize('hunspell_num', 'skies');
+ ts_lexize
+ -----------
+ {sky}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'bookings');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'booking');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'foot');
+ ts_lexize
+ -----------
+ {foot}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'foots');
+ ts_lexize
+ -----------
+ {foot}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'rebookings');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'rebooking');
+ ts_lexize
+ ----------------
+ {booking,book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'rebook');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'unbookings');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'unbooking');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'unbook');
+ ts_lexize
+ -----------
+ {book}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'footklubber');
+ ts_lexize
+ ----------------
+ {foot,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'footballklubber');
+ ts_lexize
+ ------------------------------------------------------
+ {footballklubber,foot,ball,klubber,football,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'ballyklubber');
+ ts_lexize
+ ----------------
+ {ball,klubber}
+ (1 row)
+
+ SELECT ts_lexize('hunspell_num', 'footballyklubber');
+ ts_lexize
+ ---------------------
+ {foot,ball,klubber}
+ (1 row)
+
-- Synonim dictionary
CREATE TEXT SEARCH DICTIONARY synonym (
Template=synonym,
***************
*** 277,282 **** SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
--- 469,516 ----
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
+ -- Test ispell dictionary with hunspell affix with FLAG long in configuration
+ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE hunspell WITH hunspell_long;
+ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ to_tsvector
+ ----------------------------------------------------------------------------------------------------
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
+ (1 row)
+
+ SELECT to_tsquery('hunspell_tst', 'footballklubber');
+ to_tsquery
+ ------------------------------------------------------------------------------
+ ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ (1 row)
+
+ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+ to_tsquery
+ ------------------------------------------------------------------------
+ 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
+ (1 row)
+
+ -- Test ispell dictionary with hunspell affix with FLAG num in configuration
+ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE hunspell_long WITH hunspell_num;
+ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ to_tsvector
+ ----------------------------------------------------------------------------------------------------
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
+ (1 row)
+
+ SELECT to_tsquery('hunspell_tst', 'footballklubber');
+ to_tsquery
+ ------------------------------------------------------------------------------
+ ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+ (1 row)
+
+ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+ to_tsquery
+ ------------------------------------------------------------------------
+ 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
+ (1 row)
+
-- Test synonym dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
COPY=english
*** a/src/test/regress/sql/tsdicts.sql
--- b/src/test/regress/sql/tsdicts.sql
***************
*** 48,53 **** SELECT ts_lexize('hunspell', 'footballklubber');
--- 48,101 ----
SELECT ts_lexize('hunspell', 'ballyklubber');
SELECT ts_lexize('hunspell', 'footballyklubber');
+ -- Test ISpell dictionary with hunspell affix file with FLAG long parameter
+ CREATE TEXT SEARCH DICTIONARY hunspell_long (
+ Template=ispell,
+ DictFile=hunspell_sample_long,
+ AffFile=hunspell_sample_long
+ );
+
+ SELECT ts_lexize('hunspell_long', 'skies');
+ SELECT ts_lexize('hunspell_long', 'bookings');
+ SELECT ts_lexize('hunspell_long', 'booking');
+ SELECT ts_lexize('hunspell_long', 'foot');
+ SELECT ts_lexize('hunspell_long', 'foots');
+ SELECT ts_lexize('hunspell_long', 'rebookings');
+ SELECT ts_lexize('hunspell_long', 'rebooking');
+ SELECT ts_lexize('hunspell_long', 'rebook');
+ SELECT ts_lexize('hunspell_long', 'unbookings');
+ SELECT ts_lexize('hunspell_long', 'unbooking');
+ SELECT ts_lexize('hunspell_long', 'unbook');
+
+ SELECT ts_lexize('hunspell_long', 'footklubber');
+ SELECT ts_lexize('hunspell_long', 'footballklubber');
+ SELECT ts_lexize('hunspell_long', 'ballyklubber');
+ SELECT ts_lexize('hunspell_long', 'footballyklubber');
+
+ -- Test ISpell dictionary with hunspell affix file with FLAG num parameter
+ CREATE TEXT SEARCH DICTIONARY hunspell_num (
+ Template=ispell,
+ DictFile=hunspell_sample_num,
+ AffFile=hunspell_sample_num
+ );
+
+ SELECT ts_lexize('hunspell_num', 'skies');
+ SELECT ts_lexize('hunspell_num', 'bookings');
+ SELECT ts_lexize('hunspell_num', 'booking');
+ SELECT ts_lexize('hunspell_num', 'foot');
+ SELECT ts_lexize('hunspell_num', 'foots');
+ SELECT ts_lexize('hunspell_num', 'rebookings');
+ SELECT ts_lexize('hunspell_num', 'rebooking');
+ SELECT ts_lexize('hunspell_num', 'rebook');
+ SELECT ts_lexize('hunspell_num', 'unbookings');
+ SELECT ts_lexize('hunspell_num', 'unbooking');
+ SELECT ts_lexize('hunspell_num', 'unbook');
+
+ SELECT ts_lexize('hunspell_num', 'footklubber');
+ SELECT ts_lexize('hunspell_num', 'footballklubber');
+ SELECT ts_lexize('hunspell_num', 'ballyklubber');
+ SELECT ts_lexize('hunspell_num', 'footballyklubber');
+
-- Synonim dictionary
CREATE TEXT SEARCH DICTIONARY synonym (
Template=synonym,
***************
*** 94,99 **** SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
--- 142,163 ----
SELECT to_tsquery('hunspell_tst', 'footballklubber');
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+ -- Test ispell dictionary with hunspell affix with FLAG long in configuration
+ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE hunspell WITH hunspell_long;
+
+ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ SELECT to_tsquery('hunspell_tst', 'footballklubber');
+ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+
+ -- Test ispell dictionary with hunspell affix with FLAG num in configuration
+ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
+ REPLACE hunspell_long WITH hunspell_num;
+
+ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
+ SELECT to_tsquery('hunspell_tst', 'footballklubber');
+ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
+
-- Test synonym dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
COPY=english