From a9d5926b68eb6e0e726b7c9838f6ea8b3b22a157 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 15 Aug 2017 14:31:39 -0400 Subject: [PATCH] doc: Document TR 35 collation options for ICU --- doc/src/sgml/charset.sgml | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 48ecfc5f48..7bb645a39f 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -709,6 +709,58 @@ ICU collations will draw an error along the lines of collation "de-x-icu" for encoding "WIN874" does not exist. + + + ICU allows collations to be customized beyond the basic + language/country/type set that is preloaded by initdb. + Users are encouraged to define their own collation objects that make use + of these facilities to suit the sorting behavior to their requirements. + Here are some examples: + + + CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit') + + + Sort digits after letters. (The default is digits before letters.) + + + + + + CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper') + + + Sort upper-case letters before lower-case letters. (The default is + lower-case letters first.) + + + + + + CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit') + + + Combines both of the above options. + + + + + + + See Unicode + Technical Standard #35 + and BCP 47 for + details. + + + + Note that while this system allows creating collations that ignore + case or ignore accents or similar (using + the ks key), PostgreSQL does not at the moment allow + such collations to act in a truly case- or accent-insensitive manner. Any + strings that compare equal according to the collation but are not + byte-wise equal will be sorted according to their byte values. + -- 2.14.1