diff -Naur postgresql-8.3beta2-orig/doc/src/sgml/textsearch.sgml postgresql-8.3beta2/doc/src/sgml/textsearch.sgml
--- postgresql-8.3beta2-orig/doc/src/sgml/textsearch.sgml 2007-10-27 02:19:45.000000000 +0200
+++ postgresql-8.3beta2/doc/src/sgml/textsearch.sgml 2007-11-14 03:35:48.000000000 +0100
@@ -2090,9 +2090,10 @@
The simple> dictionary template operates by converting the
input token to lower case and checking it against a file of stop words.
- If it is found in the file then NULL> is returned, causing
- the token to be discarded. If not, the lower-cased form of the word
- is returned as the normalized lexeme.
+ If it is found in the file then an empty array is returned. If not, the
+ return value depends on the configuration. The default is to return the
+ lower-cased form of the word, but one might choose to
+ return NULL> insead.
@@ -2135,6 +2136,34 @@
+
+ We can also choose to return NULL> insead of the lower-cased
+ lexeme if it is not found in the stop words file. This can be useful if
+ we just want to pass the unchanged lexeme to another dictionary instead
+ of reporting it as reckognized. We can control this behaviour through
+ the AcceptAll> parameter. Correct values for this parameter
+ are true> and false>, the default
+ is true>.
+
+
+
+ Using the same configuration as in the previous example:
+
+
+ALTER TEXT SEARCH DICTIONARY public.simple_dict ( AcceptAll = false );
+
+SELECT ts_lexize('public.simple_dict','YeS');
+ ts_lexize
+-----------
+
+
+SELECT ts_lexize('public.simple_dict','The');
+ ts_lexize
+-----------
+ {}
+
+
+
Most types of dictionaries rely on configuration files, such as files of
diff -Naur postgresql-8.3beta2-orig/src/backend/tsearch/dict_simple.c postgresql-8.3beta2/src/backend/tsearch/dict_simple.c
--- postgresql-8.3beta2-orig/src/backend/tsearch/dict_simple.c 2007-08-25 02:03:59.000000000 +0200
+++ postgresql-8.3beta2/src/backend/tsearch/dict_simple.c 2007-11-14 12:17:05.000000000 +0100
@@ -23,6 +23,7 @@
typedef struct
{
StopList stoplist;
+ bool acceptAll;
} DictSimple;
@@ -31,9 +32,12 @@
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSimple *d = (DictSimple *) palloc0(sizeof(DictSimple));
- bool stoploaded = false;
+ bool stoploaded = false,
+ acceptloaded = false;
ListCell *l;
+ d->acceptAll = true;
+
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
@@ -47,6 +51,18 @@
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
stoploaded = true;
}
+ else if (pg_strcasecmp("AcceptAll", defel->defname) == 0)
+ {
+ if (acceptloaded)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("multiple AcceptAll parameters")));
+ if (defGetBoolean(defel))
+ d->acceptAll = true;
+ else
+ d->acceptAll = false;
+ acceptloaded = true;
+ }
else
{
ereport(ERROR,
@@ -71,9 +87,18 @@
txt = lowerstr_with_len(in, len);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
+ {
pfree(txt);
+ PG_RETURN_POINTER(res);
+ }
else
- res[0].lexeme = txt;
-
- PG_RETURN_POINTER(res);
+ {
+ if (d->acceptAll)
+ {
+ res[0].lexeme = txt;
+ PG_RETURN_POINTER(res);
+ }
+ else
+ PG_RETURN_POINTER(NULL);
+ }
}