diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 610b7bf033..f3288fbb3f 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -3030,6 +3030,25 @@ CREATE TEXT SEARCH DICTIONARY english_stem ( + + Dictionaries in Shared Memory + + + Some dictionaries, especially Ispell, consumes + a significant amount of memory, in some cases tens of megabytes. Most of + them store the data in text files, and building the in-memory structure is + both CPU and time-consuming. Instead of doing this in each backend when + it needs a dictionary for the first time, the compiled dictionary may be + stored in shared memory so that it may be reused by other backends. + + + + To enable storing dictionaries in shared memory, set + parameter to a value greater than zero or to a value -1. + + + + diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index 6d0dedbefb..6294a52af3 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -5,6 +5,15 @@ * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * + * By default all Ispell dictionaries are stored in DSM. But if the amount + * of memory exceeds max_shared_dictionaries_size, then the dictionary will be + * allocated in private backend memory (in dictCtx context). + * + * All necessary data are built within dispell_build() function. But + * structures for regular expressions are compiled on first demand and + * stored using AffixReg array. It is because regex_t and Regis cannot be + * stored in shared memory easily. + * * * IDENTIFICATION * src/backend/tsearch/dict_ispell.c @@ -14,8 +23,10 @@ #include "postgres.h" #include "commands/defrem.h" +#include "storage/dsm.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_locale.h" +#include "tsearch/ts_shared.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" @@ -26,54 +37,126 @@ typedef struct IspellDict obj; } DictISpell; +static void parse_dictoptions(List *dictoptions, + char **dictfile, char **afffile, char **stopfile); +static void *dispell_build(List *dictoptions, Size *size); + Datum dispell_init(PG_FUNCTION_ARGS) { DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictISpell *d; - bool affloaded = false, - dictloaded = false, - stoploaded = false; - ListCell *l; + void *dict_location; + char *stopfile; d = (DictISpell *) palloc0(sizeof(DictISpell)); - NIStartBuild(&(d->obj)); + parse_dictoptions(init_data->dictoptions, NULL, NULL, &stopfile); + + if (stopfile) + readstoplist(stopfile, &(d->stoplist), lowerstr); + + dict_location = ts_dict_shmem_location(init_data, dispell_build); + Assert(dict_location); + + d->obj.dict = (IspellDictData *) dict_location; + d->obj.reg = (AffixReg *) palloc0(d->obj.dict->nAffix * + sizeof(AffixReg)); + /* Current memory context is dictionary's private memory context */ + d->obj.dictCtx = CurrentMemoryContext; + + PG_RETURN_POINTER(d); +} + +Datum +dispell_lexize(PG_FUNCTION_ARGS) +{ + DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + res = NINormalizeWord(&(d->obj), txt); + + if (res == NULL) + PG_RETURN_POINTER(NULL); + + cptr = res; + for (ptr = cptr; ptr->lexeme; ptr++) + { + if (searchstoplist(&(d->stoplist), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + } + else + { + if (cptr != ptr) + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + } + } + cptr->lexeme = NULL; + + PG_RETURN_POINTER(res); +} + +static void +parse_dictoptions(List *dictoptions, char **dictfile, char **afffile, + char **stopfile) +{ + ListCell *l; - foreach(l, init_data->dictoptions) + if (dictfile) + *dictfile = NULL; + if (afffile) + *afffile = NULL; + if (stopfile) + *stopfile = NULL; + + foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); if (strcmp(defel->defname, "dictfile") == 0) { - if (dictloaded) + if (!dictfile) + continue; + + if (*dictfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); - dictloaded = true; + *dictfile = get_tsearch_config_filename(defGetString(defel), "dict"); } else if (strcmp(defel->defname, "afffile") == 0) { - if (affloaded) + if (!afffile) + continue; + + if (*afffile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); - affloaded = true; + *afffile = get_tsearch_config_filename(defGetString(defel), "affix"); } else if (strcmp(defel->defname, "stopwords") == 0) { - if (stoploaded) + if (!stopfile) + continue; + + if (*stopfile) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple StopWords parameters"))); - readstoplist(defGetString(defel), &(d->stoplist), lowerstr); - stoploaded = true; + *stopfile = defGetString(defel); } else { @@ -83,66 +166,52 @@ dispell_init(PG_FUNCTION_ARGS) defel->defname))); } } +} - if (affloaded && dictloaded) - { - NISortDictionary(&(d->obj)); - NISortAffixes(&(d->obj)); - } - else if (!affloaded) +/* + * Build the dictionary. + * + * Result is palloc'ed. + */ +static void * +dispell_build(List *dictoptions, Size *size) +{ + IspellDictBuild build; + char *dictfile, + *afffile; + + parse_dictoptions(dictoptions, &dictfile, &afffile, NULL); + + if (!afffile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing AffFile parameter"))); } - else + else if (!dictfile) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("missing DictFile parameter"))); } - NIFinishBuild(&(d->obj)); + MemSet(&build, 0, sizeof(build)); + NIStartBuild(&build); - PG_RETURN_POINTER(d); -} + /* Read files */ + NIImportDictionary(&build, dictfile); + NIImportAffixes(&build, afffile); -Datum -dispell_lexize(PG_FUNCTION_ARGS) -{ - DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); - char *in = (char *) PG_GETARG_POINTER(1); - int32 len = PG_GETARG_INT32(2); - char *txt; - TSLexeme *res; - TSLexeme *ptr, - *cptr; + /* Build persistent data to use by backends */ + NISortDictionary(&build); + NISortAffixes(&build); - if (len <= 0) - PG_RETURN_POINTER(NULL); + NICopyData(&build); - txt = lowerstr_with_len(in, len); - res = NINormalizeWord(&(d->obj), txt); - - if (res == NULL) - PG_RETURN_POINTER(NULL); + /* Release temporary data */ + NIFinishBuild(&build); - cptr = res; - for (ptr = cptr; ptr->lexeme; ptr++) - { - if (searchstoplist(&(d->stoplist), ptr->lexeme)) - { - pfree(ptr->lexeme); - ptr->lexeme = NULL; - } - else - { - if (cptr != ptr) - memcpy(cptr, ptr, sizeof(TSLexeme)); - cptr++; - } - } - cptr->lexeme = NULL; - - PG_RETURN_POINTER(res); + /* Return the buffer and its size */ + *size = build.dict_size; + return build.dict; } diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index e071994523..1c560ef56a 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -23,33 +23,35 @@ * Compilation of a dictionary * --------------------------- * - * A compiled dictionary is stored in the IspellDict structure. Compilation of - * a dictionary is divided into the several steps: + * A compiled dictionary is stored in the following structures: + * - IspellDictBuild - stores temporary data and IspellDictData + * - IspellDictData - stores permanent data used within NINormalizeWord() + * Compilation of the dictionary is divided into the several steps: * - NIImportDictionary() - stores each word of a .dict file in the * temporary Spell field. - * - NIImportAffixes() - stores affix rules of an .affix file in the - * Affix field (not temporary) if an .affix file has the Ispell format. + * - NIImportAffixes() - stores affix rules of an .affix file in the temporary + * Affix field if an .affix file has the Ispell format. * -> NIImportOOAffixes() - stores affix rules if an .affix file has the * Hunspell format. The AffixData field is initialized if AF parameter * is defined. * - NISortDictionary() - builds a prefix tree (Trie) from the words list - * and stores it in the Dictionary field. The words list is got from the + * and stores it in the DictNodes field. The words list is got from the * Spell field. The AffixData field is initialized if AF parameter is not * defined. * - NISortAffixes(): * - builds a list of compound affixes from the affix list and stores it * in the CompoundAffix. * - builds prefix trees (Trie) from the affix list for prefixes and suffixes - * and stores them in Suffix and Prefix fields. + * and stores them in SuffixNodes and PrefixNodes fields. * The affix list is got from the Affix field. + * Persistent data of the dictionary is copied within NICopyData(). * * Memory management * ----------------- * - * The IspellDict structure has the Spell field which is used only in compile - * time. The Spell field stores a words list. It can take a lot of memory. - * Therefore when a dictionary is compiled this field is cleared by - * NIFinishBuild(). + * The IspellDictBuild structure has the temporary data which is used only in + * compile time. It can take a lot of memory. Therefore after compiling the + * dictionary this data is cleared by NIFinishBuild(). * * All resources which should cleared by NIFinishBuild() is initialized using * tmpalloc() and tmpalloc0(). @@ -73,112 +75,147 @@ * after the initialization is done. During initialization, * CurrentMemoryContext is the long-lived memory context associated * with the dictionary cache entry. We keep the short-lived stuff - * in the Conf->buildCxt context. + * in the ConfBuild->buildCxt context. */ -#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz)) -#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz)) +#define tmpalloc(sz) MemoryContextAlloc(ConfBuild->buildCxt, (sz)) +#define tmpalloc0(sz) MemoryContextAllocZero(ConfBuild->buildCxt, (sz)) -#define tmpstrdup(str) MemoryContextStrdup(Conf->buildCxt, (str)) +#define tmpstrdup(str) MemoryContextStrdup(ConfBuild->buildCxt, (str)) /* * Prepare for constructing an ISpell dictionary. * - * The IspellDict struct is assumed to be zeroed when allocated. + * The IspellDictBuild struct is assumed to be zeroed when allocated. */ void -NIStartBuild(IspellDict *Conf) +NIStartBuild(IspellDictBuild *ConfBuild) { + uint32 dict_size; + /* * The temp context is a child of CurTransactionContext, so that it will * go away automatically on error. */ - Conf->buildCxt = AllocSetContextCreate(CurTransactionContext, - "Ispell dictionary init context", - ALLOCSET_DEFAULT_SIZES); + ConfBuild->buildCxt = AllocSetContextCreate(CurTransactionContext, + "Ispell dictionary init context", + ALLOCSET_DEFAULT_SIZES); + + /* + * Allocate buffer for the dictionary in current context not in buildCxt. + */ + dict_size = MAXALIGN(IspellDictDataHdrSize); + ConfBuild->dict = palloc0(dict_size); + ConfBuild->dict_size = dict_size; } /* - * Clean up when dictionary construction is complete. + * Copy compiled and persistent data into IspellDictData. */ void -NIFinishBuild(IspellDict *Conf) +NICopyData(IspellDictBuild *ConfBuild) { - /* Release no-longer-needed temp memory */ - MemoryContextDelete(Conf->buildCxt); - /* Just for cleanliness, zero the now-dangling pointers */ - Conf->buildCxt = NULL; - Conf->Spell = NULL; - Conf->firstfree = NULL; - Conf->CompoundAffixFlags = NULL; -} + IspellDictData *dict; + uint32 size; + int i; + uint32 *offsets, + offset; + /* + * Calculate necessary space + */ + size = ConfBuild->nAffixData * sizeof(uint32); + size += ConfBuild->AffixDataEnd; -/* - * "Compact" palloc: allocate without extra palloc overhead. - * - * Since we have no need to free the ispell data items individually, there's - * not much value in the per-chunk overhead normally consumed by palloc. - * Getting rid of it is helpful since ispell can allocate a lot of small nodes. - * - * We currently pre-zero all data allocated this way, even though some of it - * doesn't need that. The cpalloc and cpalloc0 macros are just documentation - * to indicate which allocations actually require zeroing. - */ -#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */ -#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */ + size += ConfBuild->nAffix * sizeof(uint32); + size += ConfBuild->AffixSize; -static void * -compact_palloc0(IspellDict *Conf, size_t size) -{ - void *result; + size += ConfBuild->DictNodes.NodesEnd; + size += ConfBuild->PrefixNodes.NodesEnd; + size += ConfBuild->SuffixNodes.NodesEnd; - /* Should only be called during init */ - Assert(Conf->buildCxt != NULL); + size += sizeof(CMPDAffix) * ConfBuild->nCompoundAffix; - /* No point in this for large chunks */ - if (size > COMPACT_MAX_REQ) - return palloc0(size); + /* + * Copy data itself + */ + ConfBuild->dict_size = IspellDictDataHdrSize + size; + ConfBuild->dict = repalloc(ConfBuild->dict, ConfBuild->dict_size); + + dict = ConfBuild->dict; + + /* AffixData */ + dict->nAffixData = ConfBuild->nAffixData; + dict->AffixDataStart = sizeof(uint32) * ConfBuild->nAffixData; + memcpy(DictAffixDataOffset(dict), ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->nAffixData); + memcpy(DictAffixData(dict), ConfBuild->AffixData, ConfBuild->AffixDataEnd); + + /* Affix array */ + dict->nAffix = ConfBuild->nAffix; + dict->AffixOffsetStart = dict->AffixDataStart + ConfBuild->AffixDataEnd; + dict->AffixStart = dict->AffixOffsetStart + sizeof(uint32) * ConfBuild->nAffix; + offsets = (uint32 *) DictAffixOffset(dict); + offset = 0; + for (i = 0; i < ConfBuild->nAffix; i++) + { + AFFIX *affix; + uint32 size = AffixGetSize(ConfBuild->Affix[i]); - /* Keep everything maxaligned */ - size = MAXALIGN(size); + offsets[i] = offset; + affix = (AFFIX *) DictAffixGet(dict, i); + Assert(affix); - /* Need more space? */ - if (size > Conf->avail) - { - Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); - Conf->avail = COMPACT_ALLOC_CHUNK; - } + memcpy(affix, ConfBuild->Affix[i], size); - result = (void *) Conf->firstfree; - Conf->firstfree += size; - Conf->avail -= size; + offset += size; + } - return result; + /* DictNodes prefix tree */ + dict->DictNodesStart = dict->AffixStart + offset; + memcpy(DictDictNodes(dict), ConfBuild->DictNodes.Nodes, + ConfBuild->DictNodes.NodesEnd); + + /* PrefixNodes prefix tree */ + dict->PrefixNodesStart = dict->DictNodesStart + ConfBuild->DictNodes.NodesEnd; + memcpy(DictPrefixNodes(dict), ConfBuild->PrefixNodes.Nodes, + ConfBuild->PrefixNodes.NodesEnd); + + /* SuffixNodes prefix tree */ + dict->SuffixNodesStart = dict->PrefixNodesStart + ConfBuild->PrefixNodes.NodesEnd; + memcpy(DictSuffixNodes(dict), ConfBuild->SuffixNodes.Nodes, + ConfBuild->SuffixNodes.NodesEnd); + + /* CompoundAffix array */ + dict->CompoundAffixStart = dict->SuffixNodesStart + + ConfBuild->SuffixNodes.NodesEnd; + memcpy(DictCompoundAffix(dict), ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); } -#define cpalloc(size) compact_palloc0(Conf, size) -#define cpalloc0(size) compact_palloc0(Conf, size) - -static char * -cpstrdup(IspellDict *Conf, const char *str) +/* + * Clean up when dictionary construction is complete. + */ +void +NIFinishBuild(IspellDictBuild *ConfBuild) { - char *res = cpalloc(strlen(str) + 1); - - strcpy(res, str); - return res; + /* Release no-longer-needed temp memory */ + MemoryContextDelete(ConfBuild->buildCxt); + /* Just for cleanliness, zero the now-dangling pointers */ + ConfBuild->buildCxt = NULL; + ConfBuild->Spell = NULL; + ConfBuild->CompoundAffixFlags = NULL; } - /* * Apply lowerstr(), producing a temporary result (in the buildCxt). */ static char * -lowerstr_ctx(IspellDict *Conf, const char *src) +lowerstr_ctx(IspellDictBuild *ConfBuild, const char *src) { MemoryContext saveCtx; char *dst; - saveCtx = MemoryContextSwitchTo(Conf->buildCxt); + saveCtx = MemoryContextSwitchTo(ConfBuild->buildCxt); dst = lowerstr(src); MemoryContextSwitchTo(saveCtx); @@ -190,7 +227,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src) #define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) -#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) +#define GETCHAR(A,N,T) GETWCHAR( AffixFieldRepl(A), (A)->replen, N, T ) static char *VoidString = ""; @@ -311,18 +348,189 @@ strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) static int cmpaffix(const void *s1, const void *s2) { - const AFFIX *a1 = (const AFFIX *) s1; - const AFFIX *a2 = (const AFFIX *) s2; + const AFFIX *a1 = *((AFFIX *const *) s1); + const AFFIX *a2 = *((AFFIX *const *) s2); if (a1->type < a2->type) return -1; if (a1->type > a2->type) return 1; if (a1->type == FF_PREFIX) - return strcmp(a1->repl, a2->repl); + return strcmp(AffixFieldRepl(a1), AffixFieldRepl(a2)); else - return strbcmp((const unsigned char *) a1->repl, - (const unsigned char *) a2->repl); + return strbcmp((const unsigned char *) AffixFieldRepl(a1), + (const unsigned char *) AffixFieldRepl(a2)); +} + +/* + * Allocate space for AffixData. + */ +static void +InitAffixData(IspellDictBuild *ConfBuild, int numAffixData) +{ + uint32 size; + + size = 8 * 1024 /* Reserve 8KB for data */; + + ConfBuild->AffixData = (char *) tmpalloc(size); + ConfBuild->AffixDataSize = size; + ConfBuild->AffixDataOffset = (uint32 *) tmpalloc(numAffixData * sizeof(uint32)); + ConfBuild->nAffixData = 0; + ConfBuild->mAffixData= numAffixData; + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd = 0; +} + +/* + * Add affix set of affix flags into IspellDict struct. If IspellDict doesn't + * fit new affix set then resize it. + * + * ConfBuild: building structure for the current dictionary. + * AffixSet: set of affix flags. + */ +static void +AddAffixSet(IspellDictBuild *ConfBuild, const char *AffixSet, + uint32 AffixSetLen) +{ + /* + * Check available space for AffixSet. + */ + if (ConfBuild->AffixDataEnd + AffixSetLen + 1 /* \0 */ >= + ConfBuild->AffixDataSize) + { + uint32 newsize = Max(ConfBuild->AffixDataSize + 8 * 1024 /* 8KB */, + ConfBuild->AffixDataSize + AffixSetLen + 1); + + ConfBuild->AffixData = (char *) repalloc(ConfBuild->AffixData, newsize); + ConfBuild->AffixDataSize = newsize; + } + + /* Check available number of offsets */ + if (ConfBuild->nAffixData >= ConfBuild->mAffixData) + { + ConfBuild->mAffixData *= 2; + ConfBuild->AffixDataOffset = (uint32 *) repalloc(ConfBuild->AffixDataOffset, + sizeof(uint32) * ConfBuild->mAffixData); + } + + ConfBuild->AffixDataOffset[ConfBuild->nAffixData] = ConfBuild->AffixDataEnd; + StrNCpy(AffixDataGet(ConfBuild, ConfBuild->nAffixData), + AffixSet, AffixSetLen + 1); + + /* Save offset of the end of data */ + ConfBuild->AffixDataEnd += AffixSetLen + 1; + ConfBuild->nAffixData++; +} + +/* + * Allocate space for prefix tree node. + * + * ConfBuild: building structure for the current dictionary. + * array: NodeArray where to allocate new node. + * length: number of allocated NodeData. + * sizeNodeData: minimum size of each NodeData. + * sizeNodeHeader: size of header of new node. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length, + uint32 sizeNodeData, uint32 sizeNodeHeader) +{ + uint32 node_offset; + uint32 size; + + size = sizeNodeHeader + length * sizeNodeData; + size = MAXALIGN(size); + + if (array->NodesSize == 0) + { + array->NodesSize = size * 32; /* Reserve space for next levels of the + * prefix tree */ + array->Nodes = (char *) tmpalloc(array->NodesSize); + array->NodesEnd = 0; + } + else if (array->NodesEnd + size >= array->NodesSize) + { + array->NodesSize = Max(array->NodesSize * 2, array->NodesSize + size); + array->Nodes = (char *) repalloc(array->Nodes, array->NodesSize); + } + + node_offset = array->NodesEnd; + array->NodesEnd += size; + + return node_offset; +} + +/* + * Allocate space for SPNode. + * + * Returns an offset of new node in ConfBuild->DictNodes->Nodes. + */ +static uint32 +AllocateSPNode(IspellDictBuild *ConfBuild, uint32 length) +{ + uint32 offset; + SPNode *node; + SPNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, &ConfBuild->DictNodes, length, + sizeof(SPNodeData), SPNHDRSZ); + node = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, offset); + node->length = length; + + /* + * Initialize all SPNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affix = ISPELL_INVALID_INDEX; + data->compoundflag = 0; + data->isword = 0; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; +} + +/* + * Allocate space for AffixNode. + * + * Returns an offset of new node in NodeArray->Nodes. + */ +static uint32 +AllocateAffixNode(IspellDictBuild *ConfBuild, NodeArray *array, uint32 length) +{ + uint32 offset; + AffixNode *node; + AffixNodeData *data; + uint32 i; + + offset = AllocateNode(ConfBuild, array, length, sizeof(AffixNodeData), + ANHRDSZ); + node = (AffixNode *) NodeArrayGet(array, offset); + node->length = length; + node->isvoid = 0; + + /* + * Initialize all AffixNodeData with default values. We cannot use memset() + * here because not all fields have 0 as default value. + */ + for (i = 0; i < length; i++) + { + data = &(node->data[i]); + data->val = 0; + data->affstart = ISPELL_INVALID_INDEX; + data->affend = ISPELL_INVALID_INDEX; + data->node_offset = ISPELL_INVALID_OFFSET; + } + + return offset; } /* @@ -333,7 +541,7 @@ cmpaffix(const void *s1, const void *s2) * - 2 characters (FM_LONG). A character may be Unicode. * - numbers from 1 to 65000 (FM_NUM). * - * Depending on the flagMode an affix string can have the following format: + * Depending on the flagmode an affix string can have the following format: * - FM_CHAR: ABCD * Here we have 4 flags: A, B, C and D * - FM_LONG: ABCDE* @@ -341,13 +549,13 @@ cmpaffix(const void *s1, const void *s2) * - FM_NUM: 200,205,50 * Here we have 3 flags: 200, 205 and 50 * - * Conf: current dictionary. + * flagmode: flag mode of the dictionary * sflagset: the set of affix flags. Returns a reference to the start of a next * affix flag. * sflag: returns an affix flag from sflagset. */ static void -getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) +getNextFlagFromString(FlagMode flagmode, char **sflagset, char *sflag) { int32 s; char *next, @@ -356,11 +564,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) bool stop = false; bool met_comma = false; - maxstep = (Conf->flagMode == FM_LONG) ? 2 : 1; + maxstep = (flagmode == FM_LONG) ? 2 : 1; while (**sflagset) { - switch (Conf->flagMode) + switch (flagmode) { case FM_LONG: case FM_CHAR: @@ -422,15 +630,15 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) stop = true; break; default: - elog(ERROR, "unrecognized type of Conf->flagMode: %d", - Conf->flagMode); + elog(ERROR, "unrecognized type of flagmode: %d", + flagmode); } if (stop) break; } - if (Conf->flagMode == FM_LONG && maxstep > 0) + if (flagmode == FM_LONG && maxstep > 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix flag \"%s\" with \"long\" flag value", @@ -440,31 +648,28 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) } /* - * Checks if the affix set Conf->AffixData[affix] contains affixflag. - * Conf->AffixData[affix] does not contain affixflag if this flag is not used - * actually by the .dict file. + * Checks if the affix set from AffixData contains affixflag. Affix set does + * not contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. - * affix: index of the Conf->AffixData array. + * flagmode: flag mode of the dictionary. + * sflagset: the set of affix flags. * affixflag: the affix flag. * - * Returns true if the string Conf->AffixData[affix] contains affixflag, - * otherwise returns false. + * Returns true if the affix set string contains affixflag, otherwise returns + * false. */ static bool -IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) +IsAffixFlagInUse(FlagMode flagmode, char *sflagset, const char *affixflag) { - char *flagcur; + char *flagcur = sflagset; char flag[BUFSIZ]; if (*affixflag == 0) return true; - flagcur = Conf->AffixData[affix]; - while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, flag); + getNextFlagFromString(flagmode, &flagcur, flag); /* Compare first affix flag in flagcur with affixflag */ if (strcmp(flag, affixflag) == 0) return true; @@ -477,31 +682,33 @@ IsAffixFlagInUse(IspellDict *Conf, int affix, const char *affixflag) /* * Adds the new word into the temporary array Spell. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * word: new word. * flag: set of affix flags. Single flag can be get by getNextFlagFromString(). */ static void -NIAddSpell(IspellDict *Conf, const char *word, const char *flag) +NIAddSpell(IspellDictBuild *ConfBuild, const char *word, const char *flag) { - if (Conf->nspell >= Conf->mspell) + if (ConfBuild->nSpell >= ConfBuild->mSpell) { - if (Conf->mspell) + if (ConfBuild->mSpell) { - Conf->mspell *= 2; - Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell *= 2; + ConfBuild->Spell = (SPELL **) repalloc(ConfBuild->Spell, + ConfBuild->mSpell * sizeof(SPELL *)); } else { - Conf->mspell = 1024 * 20; - Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); + ConfBuild->mSpell = 1024 * 20; + ConfBuild->Spell = (SPELL **) tmpalloc(ConfBuild->mSpell * sizeof(SPELL *)); } } - Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); - strcpy(Conf->Spell[Conf->nspell]->word, word); - Conf->Spell[Conf->nspell]->p.flag = (*flag != '\0') + ConfBuild->Spell[ConfBuild->nSpell] = + (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); + strcpy(ConfBuild->Spell[ConfBuild->nSpell]->word, word); + ConfBuild->Spell[ConfBuild->nSpell]->p.flag = (*flag != '\0') ? tmpstrdup(flag) : VoidString; - Conf->nspell++; + ConfBuild->nSpell++; } /* @@ -509,11 +716,11 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) * * Note caller must already have applied get_tsearch_config_filename. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .dict file. */ void -NIImportDictionary(IspellDict *Conf, const char *filename) +NIImportDictionary(IspellDictBuild *ConfBuild, const char *filename) { tsearch_readline_state trst; char *line; @@ -564,9 +771,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) } s += pg_mblen(s); } - pstr = lowerstr_ctx(Conf, line); + pstr = lowerstr_ctx(ConfBuild, line); - NIAddSpell(Conf, pstr, flag); + NIAddSpell(ConfBuild, pstr, flag); pfree(pstr); pfree(line); @@ -590,7 +797,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * SFX M 0 's . * is presented here. * - * Conf: current dictionary. + * dict: current dictionary. * word: basic form of word. * affixflag: affix flag, by which a basic form of word was generated. * flag: compound flag used to compare with StopMiddle->compoundflag. @@ -598,9 +805,9 @@ NIImportDictionary(IspellDict *Conf, const char *filename) * Returns 1 if the word was found in the prefix tree, else returns 0. */ static int -FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) +FindWord(IspellDictData *dict, const char *word, const char *affixflag, int flag) { - SPNode *node = Conf->Dictionary; + SPNode *node = (SPNode *) DictDictNodes(dict); SPNodeData *StopLow, *StopHigh, *StopMiddle; @@ -636,10 +843,14 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * Check if this affix rule is presented in the affix set * with index StopMiddle->affix. */ - if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag)) + if (IsAffixFlagInUse(dict->flagMode, + DictAffixDataGet(dict, StopMiddle->affix), + affixflag)) return 1; } - node = StopMiddle->node; + /* Retreive SPNode by the offset */ + node = (SPNode *) DictNodeGet(DictDictNodes(dict), + StopMiddle->node_offset); ptr++; break; } @@ -657,7 +868,8 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) /* * Adds a new affix rule to the Affix field. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary, is used to allocate + * temporary data. * flag: affix flag ('\' in the below example). * flagflags: set of flags from the flagval field for this affix rule. This set * is listed after '/' character in the added string (repl). @@ -673,26 +885,54 @@ FindWord(IspellDict *Conf, const char *word, const char *affixflag, int flag) * type: FF_SUFFIX or FF_PREFIX. */ static void -NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, - const char *find, const char *repl, int type) +NIAddAffix(IspellDictBuild *ConfBuild, const char *flag, char flagflags, + const char *mask, const char *find, const char *repl, int type) { AFFIX *Affix; + uint32 size; + uint32 flaglen = strlen(flag), + findlen = strlen(find), + repllen = strlen(repl), + masklen = strlen(mask); + + /* Sanity checks */ + if (flaglen > AF_FLAG_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix flag \"%s\" too long", flag))); + if (findlen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix find field \"%s\" too long", find))); + if (repllen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix repl field \"%s\" too long", repl))); + if (masklen > AF_REPL_MAXSIZE) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("affix mask field \"%s\" too long", repl))); - if (Conf->naffixes >= Conf->maffixes) + if (ConfBuild->nAffix >= ConfBuild->mAffix) { - if (Conf->maffixes) + if (ConfBuild->mAffix) { - Conf->maffixes *= 2; - Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix *= 2; + ConfBuild->Affix = (AFFIX **) repalloc(ConfBuild->Affix, + ConfBuild->mAffix * sizeof(AFFIX *)); } else { - Conf->maffixes = 16; - Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); + ConfBuild->mAffix = 255; + ConfBuild->Affix = (AFFIX **) tmpalloc(ConfBuild->mAffix * sizeof(AFFIX *)); } } - Affix = Conf->Affix + Conf->naffixes; + size = AFFIXHDRSZ + flaglen + 1 /* \0 */ + findlen + 1 /* \0 */ + + repllen + 1 /* \0 */ + masklen + 1 /* \0 */; + + Affix = (AFFIX *) tmpalloc(size); + ConfBuild->Affix[ConfBuild->nAffix] = Affix; /* This affix rule can be applied for words with any ending */ if (strcmp(mask, ".") == 0 || *mask == '\0') @@ -705,42 +945,12 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, { Affix->issimple = 0; Affix->isregis = 1; - RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX), - *mask ? mask : VoidString); } /* This affix rule will use regex_t to search word ending */ else { - int masklen; - int wmasklen; - int err; - pg_wchar *wmask; - char *tmask; - Affix->issimple = 0; Affix->isregis = 0; - tmask = (char *) tmpalloc(strlen(mask) + 3); - if (type == FF_SUFFIX) - sprintf(tmask, "%s$", mask); - else - sprintf(tmask, "^%s", mask); - - masklen = strlen(tmask); - wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); - wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); - - err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, - REG_ADVANCED | REG_NOSUB, - DEFAULT_COLLATION_OID); - if (err) - { - char errstr[100]; - - pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("invalid regular expression: %s", errstr))); - } } Affix->flagflags = flagflags; @@ -749,15 +959,22 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask, if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) Affix->flagflags |= FF_COMPOUNDFLAG; } - Affix->flag = cpstrdup(Conf, flag); + Affix->type = type; - Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; - if ((Affix->replen = strlen(repl)) > 0) - Affix->repl = cpstrdup(Conf, repl); - else - Affix->repl = VoidString; - Conf->naffixes++; + Affix->replen = repllen; + StrNCpy(AffixFieldRepl(Affix), repl, repllen + 1); + + Affix->findlen = findlen; + StrNCpy(AffixFieldFind(Affix), find, findlen + 1); + + Affix->masklen = masklen; + StrNCpy(AffixFieldMask(Affix), mask, masklen + 1); + + StrNCpy(AffixFieldFlag(Affix), flag, flaglen + 1); + + ConfBuild->nAffix++; + ConfBuild->AffixSize += size; } /* Parsing states for parse_affentry() and friends */ @@ -1021,10 +1238,10 @@ parse_affentry(char *str, char *mask, char *find, char *repl) * Sets a Hunspell options depending on flag type. */ static void -setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, +setCompoundAffixFlagValue(IspellDictBuild *ConfBuild, CompoundAffixFlag *entry, char *s, uint32 val) { - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { char *next; int i; @@ -1044,19 +1261,19 @@ setCompoundAffixFlagValue(IspellDict *Conf, CompoundAffixFlag *entry, else entry->flag.s = tmpstrdup(s); - entry->flagMode = Conf->flagMode; + entry->flagMode = ConfBuild->dict->flagMode; entry->value = val; } /* * Sets up a correspondence for the affix parameter with the affix flag. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * s: affix flag in string. * val: affix parameter. */ static void -addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) +addCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s, uint32 val) { CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; @@ -1083,29 +1300,29 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) *sflag = '\0'; /* Resize array or allocate memory for array CompoundAffixFlag */ - if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag) + if (ConfBuild->nCompoundAffixFlag >= ConfBuild->mCompoundAffixFlag) { - if (Conf->mCompoundAffixFlag) + if (ConfBuild->mCompoundAffixFlag) { - Conf->mCompoundAffixFlag *= 2; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - repalloc((void *) Conf->CompoundAffixFlags, - Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag *= 2; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + repalloc((void *) ConfBuild->CompoundAffixFlags, + ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } else { - Conf->mCompoundAffixFlag = 10; - Conf->CompoundAffixFlags = (CompoundAffixFlag *) - tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); + ConfBuild->mCompoundAffixFlag = 10; + ConfBuild->CompoundAffixFlags = (CompoundAffixFlag *) + tmpalloc(ConfBuild->mCompoundAffixFlag * sizeof(CompoundAffixFlag)); } } - newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag; + newValue = ConfBuild->CompoundAffixFlags + ConfBuild->nCompoundAffixFlag; - setCompoundAffixFlagValue(Conf, newValue, sbuf, val); + setCompoundAffixFlagValue(ConfBuild, newValue, sbuf, val); - Conf->usecompound = true; - Conf->nCompoundAffixFlag++; + ConfBuild->dict->usecompound = true; + ConfBuild->nCompoundAffixFlag++; } /* @@ -1113,7 +1330,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) * flags s. */ static int -getCompoundAffixFlagValue(IspellDict *Conf, char *s) +getCompoundAffixFlagValue(IspellDictBuild *ConfBuild, char *s) { uint32 flag = 0; CompoundAffixFlag *found, @@ -1121,18 +1338,18 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) char sflag[BUFSIZ]; char *flagcur; - if (Conf->nCompoundAffixFlag == 0) + if (ConfBuild->nCompoundAffixFlag == 0) return 0; flagcur = s; while (*flagcur) { - getNextFlagFromString(Conf, &flagcur, sflag); - setCompoundAffixFlagValue(Conf, &key, sflag, 0); + getNextFlagFromString(ConfBuild->dict->flagMode, &flagcur, sflag); + setCompoundAffixFlagValue(ConfBuild, &key, sflag, 0); found = (CompoundAffixFlag *) - bsearch(&key, (void *) Conf->CompoundAffixFlags, - Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag), + bsearch(&key, (void *) ConfBuild->CompoundAffixFlags, + ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (found != NULL) flag |= found->value; @@ -1144,14 +1361,13 @@ getCompoundAffixFlagValue(IspellDict *Conf, char *s) /* * Returns a flag set using the s parameter. * - * If Conf->useFlagAliases is true then the s parameter is index of the - * Conf->AffixData array and function returns its entry. - * Else function returns the s parameter. + * If useFlagAliases is true then the s parameter is index of the AffixData + * array and function returns its entry. Else function returns the s parameter. */ static char * -getAffixFlagSet(IspellDict *Conf, char *s) +getAffixFlagSet(IspellDictBuild *ConfBuild, char *s) { - if (Conf->useFlagAliases && *s != '\0') + if (ConfBuild->dict->useFlagAliases && *s != '\0') { int curaffix; char *end; @@ -1162,13 +1378,13 @@ getAffixFlagSet(IspellDict *Conf, char *s) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", s))); - if (curaffix > 0 && curaffix <= Conf->nAffixData) + if (curaffix > 0 && curaffix <= ConfBuild->nAffixData) /* * Do not subtract 1 from curaffix because empty string was added * in NIImportOOAffixes */ - return Conf->AffixData[curaffix]; + return AffixDataGet(ConfBuild, curaffix); else return VoidString; } @@ -1179,11 +1395,11 @@ getAffixFlagSet(IspellDict *Conf, char *s) /* * Import an affix file that follows MySpell or Hunspell format. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * filename: path to the .affix file. */ static void -NIImportOOAffixes(IspellDict *Conf, const char *filename) +NIImportOOAffixes(IspellDictBuild *ConfBuild, const char *filename) { char type[BUFSIZ], *ptype = NULL; @@ -1195,17 +1411,16 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) char repl[BUFSIZ], *prepl; bool isSuffix = false; - int naffix = 0, - curaffix = 0; + int naffix = 0; int sflaglen = 0; char flagflags = 0; tsearch_readline_state trst; char *recoded; /* read file to find any flag */ - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; if (!tsearch_readline_begin(&trst, filename)) ereport(ERROR, @@ -1222,30 +1437,36 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDFLAG"), FF_COMPOUNDFLAG); else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDBEGIN"), FF_COMPOUNDBEGIN); else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDLAST"), FF_COMPOUNDLAST); /* COMPOUNDLAST and COMPOUNDEND are synonyms */ else if (STRNCMP(recoded, "COMPOUNDEND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDEND"), FF_COMPOUNDLAST); else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("COMPOUNDMIDDLE"), FF_COMPOUNDMIDDLE); else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) - addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), + addCompoundAffixFlagValue(ConfBuild, + recoded + strlen("ONLYINCOMPOUND"), FF_COMPOUNDONLY); else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDPERMITFLAG"), FF_COMPOUNDPERMITFLAG); else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) - addCompoundAffixFlagValue(Conf, + addCompoundAffixFlagValue(ConfBuild, recoded + strlen("COMPOUNDFORBIDFLAG"), FF_COMPOUNDFORBIDFLAG); else if (STRNCMP(recoded, "FLAG") == 0) @@ -1258,9 +1479,9 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (*s) { if (STRNCMP(s, "long") == 0) - Conf->flagMode = FM_LONG; + ConfBuild->dict->flagMode = FM_LONG; else if (STRNCMP(s, "num") == 0) - Conf->flagMode = FM_NUM; + ConfBuild->dict->flagMode = FM_NUM; else if (STRNCMP(s, "default") != 0) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -1274,8 +1495,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) } tsearch_readline_end(&trst); - if (Conf->nCompoundAffixFlag > 1) - qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag, + if (ConfBuild->nCompoundAffixFlag > 1) + qsort((void *) ConfBuild->CompoundAffixFlags, ConfBuild->nCompoundAffixFlag, sizeof(CompoundAffixFlag), cmpcmdflag); if (!tsearch_readline_begin(&trst, filename)) @@ -1295,15 +1516,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) if (ptype) pfree(ptype); - ptype = lowerstr_ctx(Conf, type); + ptype = lowerstr_ctx(ConfBuild, type); /* First try to parse AF parameter (alias compression) */ if (STRNCMP(ptype, "af") == 0) { /* First line is the number of aliases */ - if (!Conf->useFlagAliases) + if (!ConfBuild->dict->useFlagAliases) { - Conf->useFlagAliases = true; + ConfBuild->dict->useFlagAliases = true; naffix = atoi(sflag); if (naffix == 0) ereport(ERROR, @@ -1313,21 +1534,15 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Also reserve place for empty flag set */ naffix++; - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); - Conf->lenAffixData = Conf->nAffixData = naffix; + InitAffixData(ConfBuild, naffix); /* Add empty flag set into AffixData */ - Conf->AffixData[curaffix] = VoidString; - curaffix++; + AddAffixSet(ConfBuild, VoidString, 0); } /* Other lines is aliases */ else { - if (curaffix < naffix) - { - Conf->AffixData[curaffix] = cpstrdup(Conf, sflag); - curaffix++; - } + AddAffixSet(ConfBuild, sflag, strlen(sflag)); } goto nextline; } @@ -1338,8 +1553,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) sflaglen = strlen(sflag); if (sflaglen == 0 - || (sflaglen > 1 && Conf->flagMode == FM_CHAR) - || (sflaglen > 2 && Conf->flagMode == FM_LONG)) + || (sflaglen > 1 && ConfBuild->dict->flagMode == FM_CHAR) + || (sflaglen > 2 && ConfBuild->dict->flagMode == FM_LONG)) goto nextline; /*-------- @@ -1367,21 +1582,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) /* Get flags after '/' (flags are case sensitive) */ if ((ptr = strchr(repl, '/')) != NULL) - aflg |= getCompoundAffixFlagValue(Conf, - getAffixFlagSet(Conf, + aflg |= getCompoundAffixFlagValue(ConfBuild, + getAffixFlagSet(ConfBuild, ptr + 1)); /* Get lowercased version of string before '/' */ - prepl = lowerstr_ctx(Conf, repl); + prepl = lowerstr_ctx(ConfBuild, repl); if ((ptr = strchr(prepl, '/')) != NULL) *ptr = '\0'; - pfind = lowerstr_ctx(Conf, find); - pmask = lowerstr_ctx(Conf, mask); + pfind = lowerstr_ctx(ConfBuild, find); + pmask = lowerstr_ctx(ConfBuild, mask); if (t_iseq(find, '0')) *pfind = '\0'; if (t_iseq(repl, '0')) *prepl = '\0'; - NIAddAffix(Conf, sflag, flagflags | aflg, pmask, pfind, prepl, + NIAddAffix(ConfBuild, sflag, flagflags | aflg, pmask, pfind, prepl, isSuffix ? FF_SUFFIX : FF_PREFIX); pfree(prepl); pfree(pfind); @@ -1407,7 +1622,7 @@ nextline: * work to NIImportOOAffixes(), which will re-read the whole file. */ void -NIImportAffixes(IspellDict *Conf, const char *filename) +NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename) { char *pstr = NULL; char flag[BUFSIZ]; @@ -1428,9 +1643,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename) errmsg("could not open affix file \"%s\": %m", filename))); - Conf->usecompound = false; - Conf->useFlagAliases = false; - Conf->flagMode = FM_CHAR; + ConfBuild->dict->usecompound = false; + ConfBuild->dict->useFlagAliases = false; + ConfBuild->dict->flagMode = FM_CHAR; while ((recoded = tsearch_readline(&trst)) != NULL) { @@ -1452,10 +1667,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s += pg_mblen(s); if (*s && pg_mblen(s) == 1) - { - addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); - Conf->usecompound = true; - } + addCompoundAffixFlagValue(ConfBuild, s, FF_COMPOUNDFLAG); + oldformat = true; goto nextline; } @@ -1528,7 +1741,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) if (!parse_affentry(pstr, mask, find, repl)) goto nextline; - NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); + NIAddAffix(ConfBuild, flag, flagflags, mask, find, repl, + suffixes ? FF_SUFFIX : FF_PREFIX); nextline: pfree(recoded); @@ -1547,53 +1761,48 @@ isnewformat: errmsg("affix file contains both old-style and new-style commands"))); tsearch_readline_end(&trst); - NIImportOOAffixes(Conf, filename); + NIImportOOAffixes(ConfBuild, filename); } /* * Merges two affix flag sets and stores a new affix flag set into - * Conf->AffixData. + * ConfBuild->AffixData. * * Returns index of a new affix flag set. */ static int -MergeAffix(IspellDict *Conf, int a1, int a2) +MergeAffix(IspellDictBuild *ConfBuild, int a1, int a2) { - char **ptr; + char *ptr; + uint32 len; /* Do not merge affix flags if one of affix flags is empty */ - if (*Conf->AffixData[a1] == '\0') + if (*AffixDataGet(ConfBuild, a1) == '\0') return a2; - else if (*Conf->AffixData[a2] == '\0') + else if (*AffixDataGet(ConfBuild, a2) == '\0') return a1; - while (Conf->nAffixData + 1 >= Conf->lenAffixData) - { - Conf->lenAffixData *= 2; - Conf->AffixData = (char **) repalloc(Conf->AffixData, - sizeof(char *) * Conf->lenAffixData); - } - - ptr = Conf->AffixData + Conf->nAffixData; - if (Conf->flagMode == FM_NUM) + if (ConfBuild->dict->flagMode == FM_NUM) { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* comma */ + 1 /* \0 */ ); - sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + 1 /* comma */ + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */); + sprintf(ptr, "%s,%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } else { - *ptr = cpalloc(strlen(Conf->AffixData[a1]) + - strlen(Conf->AffixData[a2]) + - 1 /* \0 */ ); - sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]); + len = strlen(AffixDataGet(ConfBuild, a1)) + + strlen(AffixDataGet(ConfBuild, a2)); + ptr = tmpalloc(len + 1 /* \0 */ ); + sprintf(ptr, "%s%s", AffixDataGet(ConfBuild, a1), + AffixDataGet(ConfBuild, a2)); } - ptr++; - *ptr = NULL; - Conf->nAffixData++; - return Conf->nAffixData - 1; + AddAffixSet(ConfBuild, ptr, len); + pfree(ptr); + + return ConfBuild->nAffixData - 1; } /* @@ -1601,66 +1810,87 @@ MergeAffix(IspellDict *Conf, int a1, int a2) * flags with the given index. */ static uint32 -makeCompoundFlags(IspellDict *Conf, int affix) +makeCompoundFlags(IspellDictBuild *ConfBuild, int affix) { - char *str = Conf->AffixData[affix]; + char *str = AffixDataGet(ConfBuild, affix); - return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK); + return (getCompoundAffixFlagValue(ConfBuild, str) & FF_COMPOUNDFLAGMASK); } /* * Makes a prefix tree for the given level. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Spell array. * high: upper index of the Conf->Spell array. * level: current prefix tree level. + * + * Returns an offset of SPNode in DictNodes. */ -static SPNode * -mkSPNode(IspellDict *Conf, int low, int high, int level) +static uint32 +mkSPNode(IspellDictBuild *ConfBuild, int low, int high, int level) { int i; int nchar = 0; char lastchar = '\0'; + uint32 rs_offset, + new_offset; SPNode *rs; SPNodeData *data; + int data_index = 0; int lownew = low; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) + if (ConfBuild->Spell[i]->p.d.len > level && + lastchar != ConfBuild->Spell[i]->word[level]) { nchar++; - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); - rs->length = nchar; + rs_offset = AllocateSPNode(ConfBuild, nchar); + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); data = rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Spell[i]->p.d.len > level) + if (ConfBuild->Spell[i]->p.d.len > level) { - if (lastchar != Conf->Spell[i]->word[level]) + if (lastchar != ConfBuild->Spell[i]->word[level]) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, i, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, i, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within + * mkSPNode(), so reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Work with next node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; - data++; } - lastchar = Conf->Spell[i]->word[level]; + lastchar = ConfBuild->Spell[i]->word[level]; } - data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; - if (Conf->Spell[i]->p.d.len == level + 1) + data->val = ((uint8 *) (ConfBuild->Spell[i]->word))[level]; + if (ConfBuild->Spell[i]->p.d.len == level + 1) { bool clearCompoundOnly = false; - if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) + if (data->isword && data->affix != ConfBuild->Spell[i]->p.d.affix) { /* * MergeAffix called a few times. If one of word is @@ -1669,15 +1899,17 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) */ clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag - & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) + & makeCompoundFlags(ConfBuild, + ConfBuild->Spell[i]->p.d.affix)) ? false : true; - data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); + data->affix = MergeAffix(ConfBuild, data->affix, + ConfBuild->Spell[i]->p.d.affix); } else - data->affix = Conf->Spell[i]->p.d.affix; + data->affix = ConfBuild->Spell[i]->p.d.affix; data->isword = 1; - data->compoundflag = makeCompoundFlags(Conf, data->affix); + data->compoundflag = makeCompoundFlags(ConfBuild, data->affix); if ((data->compoundflag & FF_COMPOUNDONLY) && (data->compoundflag & FF_COMPOUNDFLAG) == 0) @@ -1689,9 +1921,19 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) } /* Next level of the prefix tree */ - data->node = mkSPNode(Conf, lownew, high, level + 1); + new_offset = mkSPNode(ConfBuild, lownew, high, level + 1); + + /* + * ConfBuild->DictNodes can be repalloc'ed within mkSPNode(), so + * reinitialize pointers. + */ + rs = (SPNode *) NodeArrayGet(&ConfBuild->DictNodes, rs_offset); - return rs; + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + return rs_offset; } /* @@ -1699,7 +1941,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) * and affixes. */ void -NISortDictionary(IspellDict *Conf) +NISortDictionary(IspellDictBuild *ConfBuild) { int i; int naffix = 0; @@ -1708,81 +1950,81 @@ NISortDictionary(IspellDict *Conf) /* compress affixes */ /* - * If we use flag aliases then we need to use Conf->AffixData filled in + * If we use flag aliases then we need to use ConfBuild->AffixData filled in * the NIImportOOAffixes(). */ - if (Conf->useFlagAliases) + if (ConfBuild->dict->useFlagAliases) { - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { char *end; - if (*Conf->Spell[i]->p.flag != '\0') + if (*ConfBuild->Spell[i]->p.flag != '\0') { - curaffix = strtol(Conf->Spell[i]->p.flag, &end, 10); - if (Conf->Spell[i]->p.flag == end || errno == ERANGE) + curaffix = strtol(ConfBuild->Spell[i]->p.flag, &end, 10); + if (ConfBuild->Spell[i]->p.flag == end || errno == ERANGE) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", - Conf->Spell[i]->p.flag))); + ConfBuild->Spell[i]->p.flag))); } else { /* - * If Conf->Spell[i]->p.flag is empty, then get empty value of - * Conf->AffixData (0 index). + * If ConfBuild->Spell[i]->p.flag is empty, then get empty + * value of ConfBuild->AffixData (0 index). */ curaffix = 0; } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } } - /* Otherwise fill Conf->AffixData here */ + /* Otherwise fill ConfBuild->AffixData here */ else { /* Count the number of different flags used in the dictionary */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspellaffix); naffix = 0; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) + || strcmp(ConfBuild->Spell[i]->p.flag, + ConfBuild->Spell[i - 1]->p.flag)) naffix++; } /* - * Fill in Conf->AffixData with the affixes that were used in the - * dictionary. Replace textual flag-field of Conf->Spell entries with - * indexes into Conf->AffixData array. + * Fill in AffixData with the affixes that were used in the + * dictionary. Replace textual flag-field of ConfBuild->Spell entries + * with indexes into ConfBuild->AffixData array. */ - Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); + InitAffixData(ConfBuild, naffix); curaffix = -1; - for (i = 0; i < Conf->nspell; i++) + for (i = 0; i < ConfBuild->nSpell; i++) { if (i == 0 - || strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix])) + || strcmp(ConfBuild->Spell[i]->p.flag, + AffixDataGet(ConfBuild, curaffix))) { curaffix++; Assert(curaffix < naffix); - Conf->AffixData[curaffix] = cpstrdup(Conf, - Conf->Spell[i]->p.flag); + AddAffixSet(ConfBuild, ConfBuild->Spell[i]->p.flag, + strlen(ConfBuild->Spell[i]->p.flag)); } - Conf->Spell[i]->p.d.affix = curaffix; - Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + ConfBuild->Spell[i]->p.d.affix = curaffix; + ConfBuild->Spell[i]->p.d.len = strlen(ConfBuild->Spell[i]->word); } - - Conf->lenAffixData = Conf->nAffixData = naffix; } /* Start build a prefix tree */ - qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); - Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); + qsort((void *) ConfBuild->Spell, ConfBuild->nSpell, sizeof(SPELL *), cmpspell); + mkSPNode(ConfBuild, 0, ConfBuild->nSpell, 0); } /* @@ -1790,83 +2032,104 @@ NISortDictionary(IspellDict *Conf) * rule. Affixes with empty replace string do not include in the prefix tree. * This affixes are included by mkVoidAffix(). * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * low: lower index of the Conf->Affix array. * high: upper index of the Conf->Affix array. * level: current prefix tree level. * type: FF_SUFFIX or FF_PREFIX. + * + * Returns an offset in nodes array. */ -static AffixNode * -mkANode(IspellDict *Conf, int low, int high, int level, int type) +static uint32 +mkANode(IspellDictBuild *ConfBuild, int low, int high, int level, int type) { int i; int nchar = 0; uint8 lastchar = '\0'; + NodeArray *array; + uint32 rs_offset, + new_offset; AffixNode *rs; AffixNodeData *data; + int data_index = 0; int lownew = low; - int naff; - AFFIX **aff; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (ConfBuild->Affix[i]->replen > level && + lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { nchar++; - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } if (!nchar) - return NULL; + return ISPELL_INVALID_OFFSET; - aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); - naff = 0; + if (type == FF_SUFFIX) + array = &ConfBuild->SuffixNodes; + else + array = &ConfBuild->PrefixNodes; - rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); - rs->length = nchar; - data = rs->data; + rs_offset = AllocateAffixNode(ConfBuild, array, nchar); + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + data = (AffixNodeData *) rs->data; lastchar = '\0'; for (i = low; i < high; i++) - if (Conf->Affix[i].replen > level) + if (ConfBuild->Affix[i]->replen > level) { - if (lastchar != GETCHAR(Conf->Affix + i, level, type)) + if (lastchar != GETCHAR(ConfBuild->Affix[i], level, type)) { if (lastchar) { /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, i, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } - data++; + new_offset = mkANode(ConfBuild, lownew, i, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so + * reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); + + /* First save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; + + /* Handle next data node */ + data_index++; + Assert(data_index < nchar); + data = &(rs->data[data_index]); + lownew = i; } - lastchar = GETCHAR(Conf->Affix + i, level, type); + lastchar = GETCHAR(ConfBuild->Affix[i], level, type); } - data->val = GETCHAR(Conf->Affix + i, level, type); - if (Conf->Affix[i].replen == level + 1) + data->val = GETCHAR(ConfBuild->Affix[i], level, type); + if (ConfBuild->Affix[i]->replen == level + 1) { /* affix stopped */ - aff[naff++] = Conf->Affix + i; + if (data->affstart == ISPELL_INVALID_INDEX) + { + data->affstart = i; + data->affend = i; + } + else + data->affend = i; } } /* Next level of the prefix tree */ - data->node = mkANode(Conf, lownew, high, level + 1, type); - if (naff) - { - data->naff = naff; - data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); - memcpy(data->aff, aff, sizeof(AFFIX *) * naff); - naff = 0; - } + new_offset = mkANode(ConfBuild, lownew, high, level + 1, type); + + /* + * array can be repalloc'ed within mkANode(), so reinitialize pointers. + */ + rs = (AffixNode *) NodeArrayGet(array, rs_offset); - pfree(aff); + /* Save offset of the new node */ + data = &(rs->data[data_index]); + data->node_offset = new_offset; - return rs; + return rs_offset; } /* @@ -1874,137 +2137,154 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) * for affixes which have empty replace string ("repl" field). */ static void -mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) +mkVoidAffix(IspellDictBuild *ConfBuild, bool issuffix, int startsuffix) { - int i, - cnt = 0; + int i; int start = (issuffix) ? startsuffix : 0; - int end = (issuffix) ? Conf->naffixes : startsuffix; - AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); - - Affix->length = 1; - Affix->isvoid = 1; + int end = (issuffix) ? ConfBuild->nAffix : startsuffix; + uint32 node_offset; + NodeArray *array; + AffixNode *Affix; + AffixNodeData *AffixData; if (issuffix) - { - Affix->data->node = Conf->Suffix; - Conf->Suffix = Affix; - } + array = &ConfBuild->SuffixNodes; else - { - Affix->data->node = Conf->Prefix; - Conf->Prefix = Affix; - } + array = &ConfBuild->PrefixNodes; - /* Count affixes with empty replace string */ - for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) - cnt++; - - /* There is not affixes with empty replace string */ - if (cnt == 0) - return; + node_offset = AllocateAffixNode(ConfBuild, array, 1); + Affix = (AffixNode *) NodeArrayGet(array, node_offset); - Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); - Affix->data->naff = (uint32) cnt; + Affix->isvoid = 1; + AffixData = (AffixNodeData *) Affix->data; - cnt = 0; for (i = start; i < end; i++) - if (Conf->Affix[i].replen == 0) + if (ConfBuild->Affix[i]->replen == 0) { - Affix->data->aff[cnt] = Conf->Affix + i; - cnt++; + if (AffixData->affstart == ISPELL_INVALID_INDEX) + { + AffixData->affstart = i; + AffixData->affend = i; + } + else + AffixData->affend = i; } } /* - * Checks if the affixflag is used by dictionary. Conf->AffixData does not + * Checks if the affixflag is used by dictionary. AffixData does not * contain affixflag if this flag is not used actually by the .dict file. * - * Conf: current dictionary. + * ConfBuild: building structure for the current dictionary. * affixflag: affix flag. * - * Returns true if the Conf->AffixData array contains affixflag, otherwise + * Returns true if the ConfBuild->AffixData array contains affixflag, otherwise * returns false. */ static bool -isAffixInUse(IspellDict *Conf, char *affixflag) +isAffixInUse(IspellDictBuild *ConfBuild, char *affixflag) { int i; - for (i = 0; i < Conf->nAffixData; i++) - if (IsAffixFlagInUse(Conf, i, affixflag)) + for (i = 0; i < ConfBuild->nAffixData; i++) + if (IsAffixFlagInUse(ConfBuild->dict->flagMode, + AffixDataGet(ConfBuild, i), affixflag)) return true; return false; } /* - * Builds Conf->Prefix and Conf->Suffix trees from the imported affixes. + * Builds Prefix and Suffix trees from the imported affixes. */ void -NISortAffixes(IspellDict *Conf) +NISortAffixes(IspellDictBuild *ConfBuild) { AFFIX *Affix; + AffixNode *voidPrefix, + *voidSuffix; size_t i; CMPDAffix *ptr; - int firstsuffix = Conf->naffixes; + int firstsuffix = ConfBuild->nAffix; + uint32 prefix_offset, + suffix_offset; - if (Conf->naffixes == 0) + if (ConfBuild->nAffix == 0) return; /* Store compound affixes in the Conf->CompoundAffix array */ - if (Conf->naffixes > 1) - qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); - Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); - ptr->affix = NULL; - - for (i = 0; i < Conf->naffixes; i++) + if (ConfBuild->nAffix > 1) + qsort((void *) ConfBuild->Affix, ConfBuild->nAffix, + sizeof(AFFIX *), cmpaffix); + ConfBuild->nCompoundAffix = ConfBuild->nAffix; + ConfBuild->CompoundAffix = ptr = + (CMPDAffix *) tmpalloc(sizeof(CMPDAffix) * ConfBuild->nCompoundAffix); + ptr->affix = ISPELL_INVALID_INDEX; + + for (i = 0; i < ConfBuild->nAffix; i++) { - Affix = &(((AFFIX *) Conf->Affix)[i]); + Affix = ConfBuild->Affix[i]; if (Affix->type == FF_SUFFIX && i < firstsuffix) firstsuffix = i; if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && - isAffixInUse(Conf, Affix->flag)) + isAffixInUse(ConfBuild, AffixFieldFlag(Affix))) { - if (ptr == Conf->CompoundAffix || + if (ptr == ConfBuild->CompoundAffix || ptr->issuffix != (ptr - 1)->issuffix || - strbncmp((const unsigned char *) (ptr - 1)->affix, - (const unsigned char *) Affix->repl, + strbncmp((const unsigned char *) AffixFieldRepl(ConfBuild->Affix[(ptr - 1)->affix]), + (const unsigned char *) AffixFieldRepl(Affix), (ptr - 1)->len)) { /* leave only unique and minimals suffixes */ - ptr->affix = Affix->repl; + ptr->affix = i; ptr->len = Affix->replen; ptr->issuffix = (Affix->type == FF_SUFFIX); ptr++; } } } - ptr->affix = NULL; - Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); + ptr->affix = ISPELL_INVALID_INDEX; + ConfBuild->nCompoundAffix = ptr - ConfBuild->CompoundAffix + 1; + ConfBuild->CompoundAffix = (CMPDAffix *) repalloc(ConfBuild->CompoundAffix, + sizeof(CMPDAffix) * (ConfBuild->nCompoundAffix)); /* Start build a prefix tree */ - Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); - Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); - mkVoidAffix(Conf, true, firstsuffix); - mkVoidAffix(Conf, false, firstsuffix); + mkVoidAffix(ConfBuild, true, firstsuffix); + mkVoidAffix(ConfBuild, false, firstsuffix); + + prefix_offset = mkANode(ConfBuild, 0, firstsuffix, 0, FF_PREFIX); + suffix_offset = mkANode(ConfBuild, firstsuffix, ConfBuild->nAffix, 0, + FF_SUFFIX); + + /* Adjust offsets of new nodes for nodes of void affixes */ + voidPrefix = (AffixNode *) NodeArrayGet(&ConfBuild->PrefixNodes, 0); + voidPrefix->data[0].node_offset = prefix_offset; + + voidSuffix = (AffixNode *) NodeArrayGet(&ConfBuild->SuffixNodes, 0); + voidSuffix->data[0].node_offset = suffix_offset; } static AffixNodeData * -FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) +FindAffixes(IspellDictData *dict, AffixNode *node, const char *word, int wrdlen, + int *level, int type) { + AffixNode *node_start; AffixNodeData *StopLow, *StopHigh, *StopMiddle; uint8 symbol; + if (type == FF_PREFIX) + node_start = (AffixNode *) DictPrefixNodes(dict); + else + node_start = (AffixNode *) DictSuffixNodes(dict); + if (node->isvoid) { /* search void affixes */ - if (node->data->naff) + if (node->data->affstart != ISPELL_INVALID_INDEX) return node->data; - node = node->data->node; + node = (AffixNode *) DictNodeGet(node_start, node->data->node_offset); } while (node && *level < wrdlen) @@ -2019,9 +2299,10 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) if (StopMiddle->val == symbol) { (*level)++; - if (StopMiddle->naff) + if (StopMiddle->affstart != ISPELL_INVALID_INDEX) return StopMiddle; - node = StopMiddle->node; + node = (AffixNode *) DictNodeGet(node_start, + StopMiddle->node_offset); break; } else if (StopMiddle->val < symbol) @@ -2035,8 +2316,67 @@ FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type) return NULL; } +/* + * Compile regular expression on first use and store it within reg. + */ +static void +CompileAffixReg(AffixReg *reg, bool isregis, int type, + const char *mask, int masklen, MemoryContext dictCtx) +{ + MemoryContext oldcontext; + + Assert(dictCtx); + + /* + * Switch to memory context of the dictionary, so compiled expression can be + * used in other queries. + */ + oldcontext = MemoryContextSwitchTo(dictCtx); + + if (isregis) + RS_compile(®->r.regis, (type == FF_SUFFIX), mask); + else + { + int wmasklen; + int err; + pg_wchar *wmask; + char *tmask; + + tmask = (char *) palloc(masklen + 3); + if (type == FF_SUFFIX) + sprintf(tmask, "%s$", mask); + else + sprintf(tmask, "^%s", mask); + + masklen = strlen(tmask); + wmask = (pg_wchar *) palloc((masklen + 1) * sizeof(pg_wchar)); + wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); + + err = pg_regcomp(&(reg->r.regex), wmask, wmasklen, + REG_ADVANCED | REG_NOSUB, + DEFAULT_COLLATION_OID); + if (err) + { + char errstr[100]; + + pg_regerror(err, &(reg->r.regex), errstr, sizeof(errstr)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errstr))); + } + + pfree(wmask); + pfree(tmask); + } + + reg->iscompiled = true; + + MemoryContextSwitchTo(oldcontext); +} + static char * -CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen) +CheckAffix(const char *word, size_t len, AFFIX *Affix, AffixReg *reg, + int flagflags, char *newword, int *baselen, MemoryContext dictCtx) { /* * Check compound allow flags @@ -2076,7 +2416,7 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww if (Affix->type == FF_SUFFIX) { strcpy(newword, word); - strcpy(newword + len - Affix->replen, Affix->find); + strcpy(newword + len - Affix->replen, AffixFieldFind(Affix)); if (baselen) /* store length of non-changed part of word */ *baselen = len - Affix->replen; } @@ -2086,9 +2426,9 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww * if prefix is an all non-changed part's length then all word * contains only prefix and suffix, so out */ - if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + if (baselen && *baselen + Affix->findlen <= Affix->replen) return NULL; - strcpy(newword, Affix->find); + strcpy(newword, AffixFieldFind(Affix)); strcat(newword, word + Affix->replen); } @@ -2099,7 +2439,12 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww return newword; else if (Affix->isregis) { - if (RS_execute(&(Affix->reg.regis), newword)) + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + + if (RS_execute(&(reg->r.regis), newword)) return newword; } else @@ -2109,12 +2454,17 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww size_t data_len; int newword_len; + /* Compile the regular expression on first demand */ + if (!reg->iscompiled) + CompileAffixReg(reg, Affix->isregis, Affix->type, + AffixFieldMask(Affix), Affix->masklen, dictCtx); + /* Convert data string to wide characters */ newword_len = strlen(newword); data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); data_len = pg_mb2wchar_with_len(newword, data, newword_len); - if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) + if (!(err = pg_regexec(&(reg->r.regex), data, data_len, 0, NULL, 0, NULL, 0))) { pfree(data); return newword; @@ -2153,7 +2503,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) char **cur; char newword[2 * MAXNORMLEN] = ""; char pnewword[2 * MAXNORMLEN] = ""; - AffixNode *snode = Conf->Suffix, + AffixNode *snode = (AffixNode *) DictSuffixNodes(Conf->dict), *pnode; int i, j; @@ -2165,7 +2515,7 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) /* Check that the word itself is normal form */ - if (FindWord(Conf, word, VoidString, flag)) + if (FindWord(Conf->dict, word, VoidString, flag)) { *cur = pstrdup(word); cur++; @@ -2173,23 +2523,29 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) } /* Find all other NORMAL forms of the 'word' (check only prefix) */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; while (pnode) { - prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, word, wrdlen, &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) + AFFIX *affix = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *reg = &(Conf->reg[j]); + + if (affix && + CheckAffix(word, wrdlen, affix, reg, flag, newword, NULL, + Conf->dictCtx)) { /* prefix success */ - if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(affix), flag)) cur += addToResult(forms, cur, newword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } /* @@ -2201,45 +2557,59 @@ NormalizeSubWord(IspellDict *Conf, char *word, int flag) int baselen = 0; /* find possible suffix */ - suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); + suffix = FindAffixes(Conf->dict, snode, word, wrdlen, &slevel, + FF_SUFFIX); if (!suffix) break; /* foreach suffix check affix */ - for (i = 0; i < suffix->naff; i++) + for (i = suffix->affstart; i <= suffix->affend; i++) { - if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) + AFFIX *sufentry = (AFFIX *) DictAffixGet(Conf->dict, i); + AffixReg *sufreg = &(Conf->reg[i]); + + if (sufentry && + CheckAffix(word, wrdlen, sufentry, sufreg, flag, newword, &baselen, + Conf->dictCtx)) { /* suffix success */ - if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) + if (FindWord(Conf->dict, newword, AffixFieldFlag(sufentry), flag)) cur += addToResult(forms, cur, newword); /* now we will look changed word with prefixes */ - pnode = Conf->Prefix; + pnode = (AffixNode *) DictPrefixNodes(Conf->dict); plevel = 0; swrdlen = strlen(newword); while (pnode) { - prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); + prefix = FindAffixes(Conf->dict, pnode, newword, swrdlen, + &plevel, FF_PREFIX); if (!prefix) break; - for (j = 0; j < prefix->naff; j++) + for (j = prefix->affstart; j <= prefix->affend; j++) { - if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) + AFFIX *prefentry = (AFFIX *) DictAffixGet(Conf->dict, j); + AffixReg *prefreg = &(Conf->reg[j]); + + if (prefentry && + CheckAffix(newword, swrdlen, prefentry, prefreg, + flag, pnewword, &baselen, Conf->dictCtx)) { /* prefix success */ - char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? - VoidString : prefix->aff[j]->flag; + char *ff = (prefentry->flagflags & sufentry->flagflags & FF_CROSSPRODUCT) ? + VoidString : AffixFieldFlag(prefentry); - if (FindWord(Conf, pnewword, ff, flag)) + if (FindWord(Conf->dict, pnewword, ff, flag)) cur += addToResult(forms, cur, pnewword); } } - pnode = prefix->node; + pnode = (AffixNode *) DictNodeGet(DictPrefixNodes(Conf->dict), + prefix->node_offset); } } } - snode = suffix->node; + snode = (AffixNode *) DictNodeGet(DictSuffixNodes(Conf->dict), + suffix->node_offset); } if (cur == forms) @@ -2259,7 +2629,8 @@ typedef struct SplitVar } SplitVar; static int -CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) +CheckCompoundAffixes(IspellDictData *dict, CMPDAffix **ptr, + char *word, int len, bool CheckInPlace) { bool issuffix; @@ -2269,9 +2640,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) if (CheckInPlace) { - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + strncmp(AffixFieldRepl(affix), word, (*ptr)->len) == 0) { len = (*ptr)->len; issuffix = (*ptr)->issuffix; @@ -2285,9 +2659,12 @@ CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace) { char *affbegin; - while ((*ptr)->affix) + while ((*ptr)->affix != ISPELL_INVALID_INDEX) { - if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) + AFFIX *affix = (AFFIX *) DictAffixGet(dict, (*ptr)->affix); + + if (len > (*ptr)->len && + (affbegin = strstr(word, AffixFieldRepl(affix))) != NULL) { len = (*ptr)->len + (affbegin - word); issuffix = (*ptr)->issuffix; @@ -2339,13 +2716,14 @@ AddStem(SplitVar *v, char *word) } static SplitVar * -SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos) +SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, + char *word, int wordlen, int startpos, int minpos) { SplitVar *var = NULL; SPNodeData *StopLow, *StopHigh, *StopMiddle = NULL; - SPNode *node = (snode) ? snode : Conf->Dictionary; + SPNode *node = (snode) ? snode : (SPNode *) DictDictNodes(Conf->dict); int level = (snode) ? minpos : startpos; /* recursive * minpos==level */ int lenaff; @@ -2360,8 +2738,11 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (level < wordlen) { /* find word with epenthetic or/and compound affix */ - caff = Conf->CompoundAffix; - while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) + caff = (CMPDAffix *) DictCompoundAffix(Conf->dict); + while (level > startpos && + (lenaff = CheckCompoundAffixes(Conf->dict, &caff, + word + level, wordlen - level, + (node) ? true : false)) >= 0) { /* * there is one of compound affixes, so check word for existings @@ -2408,7 +2789,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int while (ptr->next) ptr = ptr->next; - ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, + startpos + lenaff, startpos + lenaff); pfree(new->stem); pfree(new); @@ -2467,13 +2849,14 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int /* we can find next word */ level++; AddStem(var, pnstrdup(word + startpos, level - startpos)); - node = Conf->Dictionary; + node = (SPNode *) DictDictNodes(Conf->dict); startpos = level; continue; } } } - node = StopMiddle->node; + node = (SPNode *) DictNodeGet(DictDictNodes(Conf->dict), + StopMiddle->node_offset); } else node = NULL; @@ -2523,7 +2906,7 @@ NINormalizeWord(IspellDict *Conf, char *word) pfree(res); } - if (Conf->usecompound) + if (Conf->dict->usecompound) { int wordlen = strlen(word); SplitVar *ptr, diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 210f97dda9..b40cf379eb 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -18,21 +18,23 @@ #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" +#define ISPELL_INVALID_INDEX (0x7FFFF) +#define ISPELL_INVALID_OFFSET (0xFFFFFFFF) + /* * SPNode and SPNodeData are used to represent prefix tree (Trie) to store * a words list. */ -struct SPNode; - typedef struct { uint32 val:8, isword:1, /* Stores compound flags listed below */ compoundflag:4, - /* Reference to an entry of the AffixData field */ + /* Index of an entry of the AffixData field */ affix:19; - struct SPNode *node; + /* Offset to a node of the DictNodes field */ + uint32 node_offset; } SPNodeData; /* @@ -86,21 +88,55 @@ typedef struct spell_struct */ typedef struct aff_struct { - char *flag; /* FF_SUFFIX or FF_PREFIX */ - uint32 type:1, + uint16 type:1, flagflags:7, issimple:1, isregis:1, - replen:14; - char *find; - char *repl; + flaglen:2; + + /* 8 bytes could be too mach for repl, find and mask, but who knows */ + uint8 replen; + uint8 findlen; + uint8 masklen; + + /* + * fields stores the following data (each ends with \0): + * - repl + * - find + * - mask + * - flag - one character (if FM_CHAR), + * two characters (if FM_LONG), + * number, >= 0 and < 65536 (if FM_NUM). + */ + char fields[FLEXIBLE_ARRAY_MEMBER]; +} AFFIX; + +#define AF_FLAG_MAXSIZE 5 /* strlen(65536) */ +#define AF_REPL_MAXSIZE 255 /* 8 bytes */ + +#define AFFIXHDRSZ (offsetof(AFFIX, fields)) + +#define AffixFieldRepl(af) ((af)->fields) +#define AffixFieldFind(af) ((af)->fields + (af)->replen + 1) +#define AffixFieldMask(af) (AffixFieldFind(af) + (af)->findlen + 1) +#define AffixFieldFlag(af) (AffixFieldMask(af) + (af)->masklen + 1) +#define AffixGetSize(af) (AFFIXHDRSZ + (af)->replen + 1 + (af)->findlen + 1 \ + + (af)->masklen + 1 + strlen(AffixFieldFlag(af)) + 1) + +/* + * Stores compiled regular expression of affix. AffixReg uses mask field of + * AFFIX as a regular expression. + */ +typedef struct AffixReg +{ + bool iscompiled; union { regex_t regex; Regis regis; - } reg; -} AFFIX; + } r; +} AffixReg; /* * affixes use dictionary flags too @@ -120,14 +156,13 @@ typedef struct aff_struct * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store * an affix list. */ -struct AffixNode; - typedef struct { - uint32 val:8, - naff:24; - AFFIX **aff; - struct AffixNode *node; + uint8 val; + uint32 affstart; + uint32 affend; + /* Offset to a node of the PrefixNodes or SuffixNodes field */ + uint32 node_offset; } AffixNodeData; typedef struct AffixNode @@ -139,9 +174,19 @@ typedef struct AffixNode #define ANHRDSZ (offsetof(AffixNode, data)) +typedef struct NodeArray +{ + char *Nodes; + uint32 NodesSize; /* allocated size of Nodes */ + uint32 NodesEnd; /* end of data in Nodes */ +} NodeArray; + +#define NodeArrayGet(na, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : (na)->Nodes + (of)) + typedef struct { - char *affix; + /* Index of an affix of the Affix field */ + uint32 affix; int len; bool issuffix; } CMPDAffix; @@ -176,30 +221,71 @@ typedef struct CompoundAffixFlag #define FLAGNUM_MAXSIZE (1 << 16) -typedef struct +typedef struct IspellDictData { - int maffixes; - int naffixes; - AFFIX *Affix; - - AffixNode *Suffix; - AffixNode *Prefix; + FlagMode flagMode; + bool usecompound; - SPNode *Dictionary; - /* Array of sets of affixes */ - char **AffixData; - int lenAffixData; - int nAffixData; bool useFlagAliases; - CMPDAffix *CompoundAffix; + uint32 nAffixData; + uint32 AffixDataStart; - bool usecompound; - FlagMode flagMode; + uint32 AffixOffsetStart; + uint32 AffixStart; + uint32 nAffix; + + uint32 DictNodesStart; + uint32 PrefixNodesStart; + uint32 SuffixNodesStart; + + uint32 CompoundAffixStart; /* - * All follow fields are actually needed only for initialization + * data stores: + * - AffixData - array of affix sets + * - Affix - sorted array of affixes + * - DictNodes - prefix tree of a word list + * - PrefixNodes - prefix tree of a prefix list + * - SuffixNodes - prefix tree of a suffix list + * - CompoundAffix - array of compound affixes */ + char data[FLEXIBLE_ARRAY_MEMBER]; +} IspellDictData; + +#define IspellDictDataHdrSize (offsetof(IspellDictData, data)) + +#define DictAffixDataOffset(d) ((d)->data) +#define DictAffixData(d) ((d)->data + (d)->AffixDataStart) +#define DictAffixDataGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffixData(d) + ((uint32 *) DictAffixDataOffset(d))[i]) + +#define DictAffixOffset(d) ((d)->data + (d)->AffixOffsetStart) +#define DictAffix(d) ((d)->data + (d)->AffixStart) +#define DictAffixGet(d, i) (((i) == ISPELL_INVALID_INDEX) ? NULL : \ + DictAffix(d) + ((uint32 *) DictAffixOffset(d))[i]) + +#define DictDictNodes(d) ((d)->data + (d)->DictNodesStart) +#define DictPrefixNodes(d) ((d)->data + (d)->PrefixNodesStart) +#define DictSuffixNodes(d) ((d)->data + (d)->SuffixNodesStart) +#define DictNodeGet(node_start, of) (((of) == ISPELL_INVALID_OFFSET) ? NULL : \ + (char *) (node_start) + (of)) + +#define DictCompoundAffix(d) ((d)->data + (d)->CompoundAffixStart) + +/* + * IspellDictBuild is used to initialize IspellDictData struct. This is a + * temprorary structure which is setup by NIStartBuild() and released by + * NIFinishBuild(). + */ +typedef struct IspellDictBuild +{ + MemoryContext buildCxt; /* temp context for construction */ + + IspellDictData *dict; + uint32 dict_size; + + /* Temporary data */ /* Array of Hunspell options in affix file */ CompoundAffixFlag *CompoundAffixFlags; @@ -208,29 +294,73 @@ typedef struct /* allocated length of CompoundAffixFlags array */ int mCompoundAffixFlag; - /* - * Remaining fields are only used during dictionary construction; they are - * set up by NIStartBuild and cleared by NIFinishBuild. - */ - MemoryContext buildCxt; /* temp context for construction */ - - /* Temporary array of all words in the dict file */ + /* Array of all words in the dict file */ SPELL **Spell; - int nspell; /* number of valid entries in Spell array */ - int mspell; /* allocated length of Spell array */ + int nSpell; /* number of valid entries in Spell array */ + int mSpell; /* allocated length of Spell array */ + + /* Array of all affixes in the aff file */ + AFFIX **Affix; + int nAffix; /* number of valid entries in Affix array */ + int mAffix; /* allocated length of Affix array */ + uint32 AffixSize; + + /* Data for IspellDictData */ + + /* Array of sets of affixes */ + uint32 *AffixDataOffset; + int nAffixData; /* number of affix sets */ + int mAffixData; /* allocated number of affix sets */ + char *AffixData; + uint32 AffixDataSize; /* allocated size of AffixData */ + uint32 AffixDataEnd; /* end of data in AffixData */ + + /* Prefix tree which stores a word list */ + NodeArray DictNodes; + + /* Prefix tree which stores a prefix list */ + NodeArray PrefixNodes; + + /* Prefix tree which stores a suffix list */ + NodeArray SuffixNodes; - /* These are used to allocate "compact" data without palloc overhead */ - char *firstfree; /* first free address (always maxaligned) */ - size_t avail; /* free space remaining at firstfree */ + /* Array of compound affixes */ + CMPDAffix *CompoundAffix; + int nCompoundAffix; /* number of entries of CompoundAffix */ +} IspellDictBuild; + +#define AffixDataGet(d, i) ((d)->AffixData + (d)->AffixDataOffset[i]) + +/* + * IspellDict is used within NINormalizeWord. + */ +typedef struct IspellDict +{ + /* + * Pointer to a DSM location of IspellDictData. Should be retreived per + * every dispell_lexize() call. + */ + IspellDictData *dict; + /* + * Array of regular expression of affixes. Each regular expression is + * compiled only on demand. + */ + AffixReg *reg; + /* + * Memory context for compiling regular expressions. + */ + MemoryContext dictCtx; } IspellDict; extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); -extern void NIStartBuild(IspellDict *Conf); -extern void NIImportAffixes(IspellDict *Conf, const char *filename); -extern void NIImportDictionary(IspellDict *Conf, const char *filename); -extern void NISortDictionary(IspellDict *Conf); -extern void NISortAffixes(IspellDict *Conf); -extern void NIFinishBuild(IspellDict *Conf); +extern void NIStartBuild(IspellDictBuild *ConfBuild); +extern void NIImportAffixes(IspellDictBuild *ConfBuild, const char *filename); +extern void NIImportDictionary(IspellDictBuild *ConfBuild, + const char *filename); +extern void NISortDictionary(IspellDictBuild *ConfBuild); +extern void NISortAffixes(IspellDictBuild *ConfBuild); +extern void NICopyData(IspellDictBuild *ConfBuild); +extern void NIFinishBuild(IspellDictBuild *ConfBuild); #endif