commit 17d0abf (HEAD) Author: Noah Misch AuthorDate: Fri Jul 3 19:59:50 2015 -0400 Commit: Noah Misch CommitDate: Fri Jul 3 19:59:50 2015 -0400 Revoke support for strxfrm() that write past the specified array length. This formalizes a decision implicit in commit 4ea51cdfe85ceef8afabceb03c446574daa0ac23 and adds clean detection of affected systems. Vendor updates are available for each such known bug. Back-patch to 9.5, where the aforementioned commit first appeared. diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 2ecadd6..4fad6f3 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -149,6 +149,8 @@ main(int argc, char *argv[]) */ unsetenv("LC_ALL"); + check_strxfrm_bug(); + /* * Catch standard options before doing much else, in particular before we * insist on not being root. diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 84215e0..d91959e 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -855,6 +855,64 @@ IsoLocaleName(const char *winlocname) /* + * Detect aging strxfrm() implementations that, in a subset of locales, write + * past the specified buffer length. Affected users must update OS packages + * before using PostgreSQL 9.5 or later. + * + * Assume that the bug can come and go from one postmaster startup to another + * due to physical replication among diverse machines. Assume that the bug's + * presence will not change during the life of a particular postmaster. Given + * those assumptions, call this no less than once per postmaster startup per + * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so + * there is no need to consider pg_collation locales. + */ +void +check_strxfrm_bug(void) +{ + char buf[32]; + const int canary = 0x7F; + bool ok = true; + + /* + * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10 + * 05/08 returns 18 and modifies 10 bytes. It respects limits above or + * below that range. + * + * The bug is present in Solaris 8 as well; it is absent in Solaris 10 + * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1, + * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales + * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C. + */ + buf[7] = canary; + (void) strxfrm(buf, "ab", 7); + if (buf[7] != canary) + ok = false; + + /* + * illumos bug #1594 was present in the source tree from 2010-10-11 to + * 2012-02-01. Given an ASCII string of any length and length limit 1, + * affected systems ignore the length limit and modify a number of bytes + * one less than the return value. The problem inputs for this bug do not + * overlap those for the Solaris bug, hence a distinct test. + * + * Affected systems include smartos-20110926T021612Z. Affected locales + * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C. + */ + buf[1] = canary; + (void) strxfrm(buf, "a", 1); + if (buf[1] != canary) + ok = false; + + if (!ok) + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length", + setlocale(LC_COLLATE, NULL)), + errhint("Apply system library package updates."))); +} + + +/* * Cache mechanism for collation information. * * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 04ed07b..64b6ae4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3932,16 +3932,8 @@ convert_string_datum(Datum value, Oid typid) size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY; /* - * Note: originally we guessed at a suitable output buffer size, and - * only needed to call strxfrm twice if our guess was too small. - * However, it seems that some versions of Solaris have buggy strxfrm - * that can write past the specified buffer length in that scenario. - * So, do it the dumb way for portability. - * - * Yet other systems (e.g., glibc) sometimes return a smaller value - * from the second call than the first; thus the Assert must be <= not - * == as you'd expect. Can't any of these people program their way - * out of a paper bag? + * XXX: We could guess at a suitable output buffer size and only call + * strxfrm twice if our guess is too small. * * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return * bogus data or set an error. This is not really a problem unless it @@ -3974,6 +3966,11 @@ convert_string_datum(Datum value, Oid typid) #endif xfrmstr = (char *) palloc(xfrmlen + 1); xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1); + + /* + * Some systems (e.g., glibc) can return a smaller value from the + * second call than the first; thus the Assert must be <= not ==. + */ Assert(xfrmlen2 <= xfrmlen); pfree(val); val = xfrmstr; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 0f04f28..7b19714 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -391,6 +391,8 @@ CheckMyDatabase(const char *name, bool am_superuser) SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); + check_strxfrm_bug(); + ReleaseSysCache(tup); } diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 3b56138..8e91033 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -44,6 +44,7 @@ extern void assign_locale_time(const char *newval, void *extra); extern bool check_locale(int category, const char *locale, char **canonname); extern char *pg_perm_setlocale(int category, const char *locale); +extern void check_strxfrm_bug(void); extern bool lc_collate_is_c(Oid collation); extern bool lc_ctype_is_c(Oid collation);