diff -Naur postgresql-8.3beta2-orig/doc/src/sgml/textsearch.sgml postgresql-8.3beta2/doc/src/sgml/textsearch.sgml --- postgresql-8.3beta2-orig/doc/src/sgml/textsearch.sgml 2007-10-27 02:19:45.000000000 +0200 +++ postgresql-8.3beta2/doc/src/sgml/textsearch.sgml 2007-11-14 03:35:48.000000000 +0100 @@ -2090,9 +2090,10 @@ The simple dictionary template operates by converting the input token to lower case and checking it against a file of stop words. - If it is found in the file then NULL is returned, causing - the token to be discarded. If not, the lower-cased form of the word - is returned as the normalized lexeme. + If it is found in the file then an empty array is returned. If not, the + return value depends on the configuration. The default is to return the + lower-cased form of the word, but one might choose to + return NULL insead. @@ -2135,6 +2136,34 @@ + + We can also choose to return NULL insead of the lower-cased + lexeme if it is not found in the stop words file. This can be useful if + we just want to pass the unchanged lexeme to another dictionary instead + of reporting it as reckognized. We can control this behaviour through + the AcceptAll parameter. Correct values for this parameter + are true and false, the default + is true. + + + + Using the same configuration as in the previous example: + + +ALTER TEXT SEARCH DICTIONARY public.simple_dict ( AcceptAll = false ); + +SELECT ts_lexize('public.simple_dict','YeS'); + ts_lexize +----------- + + +SELECT ts_lexize('public.simple_dict','The'); + ts_lexize +----------- + {} + + + Most types of dictionaries rely on configuration files, such as files of diff -Naur postgresql-8.3beta2-orig/src/backend/tsearch/dict_simple.c postgresql-8.3beta2/src/backend/tsearch/dict_simple.c --- postgresql-8.3beta2-orig/src/backend/tsearch/dict_simple.c 2007-08-25 02:03:59.000000000 +0200 +++ postgresql-8.3beta2/src/backend/tsearch/dict_simple.c 2007-11-14 12:17:05.000000000 +0100 @@ -23,6 +23,7 @@ typedef struct { StopList stoplist; + bool acceptAll; } DictSimple; @@ -31,9 +32,12 @@ { List *dictoptions = (List *) PG_GETARG_POINTER(0); DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple)); - bool stoploaded = false; + bool stoploaded = false, + acceptloaded = false; ListCell *l; + d->acceptAll = true; + foreach(l, dictoptions) { DefElem *defel = (DefElem *) lfirst(l); @@ -47,6 +51,18 @@ readstoplist(defGetString(defel), &d->stoplist, lowerstr); stoploaded = true; } + else if (pg_strcasecmp("AcceptAll", defel->defname) == 0) + { + if (acceptloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple AcceptAll parameters"))); + if (defGetBoolean(defel)) + d->acceptAll = true; + else + d->acceptAll = false; + acceptloaded = true; + } else { ereport(ERROR, @@ -71,9 +87,18 @@ txt = lowerstr_with_len(in, len); if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) + { pfree(txt); + PG_RETURN_POINTER(res); + } else - res[0].lexeme = txt; - - PG_RETURN_POINTER(res); + { + if (d->acceptAll) + { + res[0].lexeme = txt; + PG_RETURN_POINTER(res); + } + else + PG_RETURN_POINTER(NULL); + } }