diff --git a/contrib/dict_int/dict_int.c b/contrib/dict_int/dict_int.c index 56ede37089..8dd4959028 100644 --- a/contrib/dict_int/dict_int.c +++ b/contrib/dict_int/dict_int.c @@ -30,7 +30,7 @@ PG_FUNCTION_INFO_V1(dintdict_lexize); Datum dintdict_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictInt *d; ListCell *l; @@ -38,7 +38,7 @@ dintdict_init(PG_FUNCTION_ARGS) d->maxlen = 6; d->rejectlong = false; - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c index a79ece240c..0b8a32d459 100644 --- a/contrib/dict_xsyn/dict_xsyn.c +++ b/contrib/dict_xsyn/dict_xsyn.c @@ -140,7 +140,7 @@ read_dictionary(DictSyn *d, const char *filename) Datum dxsyn_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; char *filename = NULL; @@ -153,7 +153,7 @@ dxsyn_init(PG_FUNCTION_ARGS) d->matchsynonyms = false; d->keepsynonyms = true; - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 247c202755..2a2fbee5fa 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -267,12 +267,12 @@ PG_FUNCTION_INFO_V1(unaccent_init); Datum unaccent_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); TrieChar *rootTrie = NULL; bool fileloaded = false; ListCell *l; - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 3a843512d1..83012b5b54 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -386,17 +386,24 @@ verify_dictoptions(Oid tmplId, List *dictoptions) } else { + DictInitData init_data; + /* * Copy the options just in case init method thinks it can scribble on * them ... */ dictoptions = copyObject(dictoptions); + init_data.dict_options = dictoptions; + init_data.dict.id = InvalidOid; + init_data.dict.xmin = InvalidTransactionId; + ItemPointerSetInvalid(&init_data.dict.tid); + /* * Call the init method and see if it complains. We don't worry about * it leaking memory, since our command will soon be over anyway. */ - (void) OidFunctionCall1(initmethod, PointerGetDatum(dictoptions)); + (void) OidFunctionCall1(initmethod, PointerGetDatum(&init_data)); } ReleaseSysCache(tup); diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c index 78c9f73ef0..15ebafd833 100644 --- a/src/backend/snowball/dict_snowball.c +++ b/src/backend/snowball/dict_snowball.c @@ -181,14 +181,14 @@ locate_stem_module(DictSnowball *d, const char *lang) Datum dsnowball_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSnowball *d; bool stoploaded = false; ListCell *l; d = (DictSnowball *) palloc0(sizeof(DictSnowball)); - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index edc6547700..39f1e6faeb 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -29,7 +29,7 @@ typedef struct Datum dispell_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictISpell *d; bool affloaded = false, dictloaded = false, @@ -40,7 +40,7 @@ dispell_init(PG_FUNCTION_ARGS) NIStartBuild(&(d->obj)); - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c index ac6a24eba5..9605108334 100644 --- a/src/backend/tsearch/dict_simple.c +++ b/src/backend/tsearch/dict_simple.c @@ -29,7 +29,7 @@ typedef struct Datum dsimple_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple)); bool stoploaded = false, acceptloaded = false; @@ -37,7 +37,7 @@ dsimple_init(PG_FUNCTION_ARGS) d->accept = true; /* default */ - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index c011886cb0..02989cd16b 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -91,7 +91,7 @@ compareSyn(const void *a, const void *b) Datum dsynonym_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictSyn *d; ListCell *l; char *filename = NULL; @@ -104,7 +104,7 @@ dsynonym_init(PG_FUNCTION_ARGS) char *line = NULL; uint16 flags = 0; - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 24364e646d..1604b5f60f 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -604,7 +604,7 @@ compileTheSubstitute(DictThesaurus *d) Datum thesaurus_init(PG_FUNCTION_ARGS) { - List *dictoptions = (List *) PG_GETARG_POINTER(0); + DictInitData *init_data = (DictInitData *) PG_GETARG_POINTER(0); DictThesaurus *d; char *subdictname = NULL; bool fileloaded = false; @@ -612,7 +612,7 @@ thesaurus_init(PG_FUNCTION_ARGS) d = (DictThesaurus *) palloc0(sizeof(DictThesaurus)); - foreach(l, dictoptions) + foreach(l, init_data->dict_options) { DefElem *defel = (DefElem *) lfirst(l); diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 97347780d3..dea8c99c31 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -39,6 +39,7 @@ #include "catalog/pg_ts_template.h" #include "commands/defrem.h" #include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -312,11 +313,14 @@ lookup_ts_dictionary_cache(Oid dictId) MemSet(entry, 0, sizeof(TSDictionaryCacheEntry)); entry->dictId = dictId; entry->dictCtx = saveCtx; + entry->dict_xmin = HeapTupleHeaderGetRawXmin(tpdict->t_data); + entry->dict_tid = tpdict->t_self; entry->lexizeOid = template->tmpllexize; if (OidIsValid(template->tmplinit)) { + DictInitData init_data; List *dictoptions; Datum opt; bool isnull; @@ -336,9 +340,14 @@ lookup_ts_dictionary_cache(Oid dictId) else dictoptions = deserialize_deflist(opt); + init_data.dict_options = dictoptions; + init_data.dict.id = dictId; + init_data.dict.xmin = entry->dict_xmin; + init_data.dict.tid = entry->dict_tid; + entry->dictData = DatumGetPointer(OidFunctionCall1(template->tmplinit, - PointerGetDatum(dictoptions))); + PointerGetDatum(&init_data))); MemoryContextSwitchTo(oldcontext); } diff --git a/src/include/tsearch/ts_cache.h b/src/include/tsearch/ts_cache.h index 410f1d54af..f7d80a0853 100644 --- a/src/include/tsearch/ts_cache.h +++ b/src/include/tsearch/ts_cache.h @@ -54,6 +54,9 @@ typedef struct TSDictionaryCacheEntry Oid dictId; bool isvalid; + TransactionId dict_xmin; /* XID of the dictionary's tuple */ + ItemPointerData dict_tid; /* TID of the dictionary's tuple */ + /* most frequent fmgr call */ Oid lexizeOid; FmgrInfo lexize; diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 0b7a5aa68e..cb3a152d45 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -13,6 +13,8 @@ #ifndef _PG_TS_PUBLIC_H_ #define _PG_TS_PUBLIC_H_ +#include "nodes/pg_list.h" +#include "storage/itemptr.h" #include "tsearch/ts_type.h" /* @@ -81,10 +83,68 @@ extern void readstoplist(const char *fname, StopList *s, extern bool searchstoplist(StopList *s, char *key); /* - * Interface with dictionaries + * API for text search dictionaries. + * + * API functions to handle a text search dictionary are defined by a text search + * template. Currently an existing template cannot be altered in order to + * define another functions. API consists of the following functions: + * - init function - optional function which initializes internal structures of + * the dictionary. It accepts DictInitData structure as an argument and must + * return a custom palloc'd structure which stores content of the processed + * dictionary and is used in lexize function. + * - lexize function - normalizes a single word (token) using specific + * dictionary. It must return a palloc'd array of TSLexeme the last entry of + * which is the terminating entry and accepts the following arguments: + * - dictData - pointer to a custom structure returned by init function or + * NULL if init function wasn't defined by the template. + * - token - string which represents a token to normalize, isn't + * null-terminated. + * - length - length of token. + * - dictState - pointer to a DictSubState structure which stores current + * state of a set of tokens processing and allows to normalize phrases. + */ + +/* + * A preprocessed dictionary can be stored in shared memory using DSM. Does + * the dictionary want it decides init function. A DSM segment is released if + * the dictionary was altered or droppped. But still there is a situation when + * we haven't a way to prevent a segment leaking. It may happen if the + * dictionary was dropped, some backend used the dictionary before dropping, the + * backend will hold its DSM segment till disconnecting or calling + * lookup_ts_dictionary_cache(), where invalid segment is unpinned. + * + * DictPointerData is a structure to search a dictionary's DSM segment. We + * need xmin and tid to be sure that the content in the DSM segment still valid. + */ +typedef struct +{ + Oid id; /* OID of dictionary which is processed */ + TransactionId xmin; /* XID of the dictionary's tuple */ + ItemPointerData tid; /* TID of the dictionary's tuple */ +} DictPointerData; + +/* + * API structure for a dictionary initialization. It is passed as an argument + * to a template's init function. */ +typedef struct +{ + /* + * A dictionary option list for a template's init method. Should go first + * for backward compatibility. + */ + List *dict_options; + /* + * A dictionary information used to allocate, search and release its DSM + * segment. + */ + DictPointerData dict; +} DictInitData; -/* return struct for any lexize function */ +/* + * Return struct for any lexize function. They are combined into an array, the + * last entry is the terminating entry. + */ typedef struct { /*---------- @@ -108,7 +168,8 @@ typedef struct uint16 flags; /* See flag bits below */ - char *lexeme; /* C string */ + char *lexeme; /* C string or NULL if it is a terminating + * entry */ } TSLexeme; /* Flag bits that can appear in TSLexeme.flags */