--- postgresql-7.4.5/src/backend/utils/adt/oracle_compat.c	2003-08-08 23:42:06.000000000 +0200
+++ postgresql-8.0.0beta1/src/backend/utils/adt/oracle_compat.c	2004-06-07 00:17:01.000000000 +0200
@@ -9,23 +9,145 @@
  *
  *
  * IDENTIFICATION
- *	$Header: /cvsroot/pgsql-server/src/backend/utils/adt/oracle_compat.c,v 1.48 2003/08/08 21:42:06 momjian Exp $
+ *	$PostgreSQL: pgsql-server/src/backend/utils/adt/oracle_compat.c,v 1.53 2004/06/06 22:17:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
+/*
+ * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
+ * declare them in <wchar.h>.
+ */
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
 
 #include "utils/builtins.h"
 #include "mb/pg_wchar.h"
 
 
+/*
+ * If the system provides the needed functions for wide-character manipulation
+ * (which are all standardized by C99), then we implement upper/lower/initcap
+ * using wide-character functions.  Otherwise we use the traditional <ctype.h>
+ * functions, which of course will not work as desired in multibyte character
+ * sets.  Note that in either case we are effectively assuming that the
+ * database character encoding matches the encoding implied by LC_CTYPE.
+ *
+ * We assume if we have these two functions, we have their friends too, and
+ * can use the wide-character method.
+ */
+#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
+#define USE_WIDE_UPPER_LOWER
+#endif
+
 static text *dotrim(const char *string, int stringlen,
 	   const char *set, int setlen,
 	   bool doltrim, bool dortrim);
 
 
+#ifdef USE_WIDE_UPPER_LOWER
+
+/*
+ * Convert a TEXT value into a palloc'd wchar string.
+ */
+static wchar_t *
+texttowcs(const text *txt)
+{
+	int			nbytes = VARSIZE(txt) - VARHDRSZ;
+	char	   *workstr;
+	wchar_t	   *result;
+	size_t		ncodes;
+
+	/* Overflow paranoia */
+	if (nbytes < 0 ||
+		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Need a null-terminated version of the input */
+	workstr = (char *) palloc(nbytes + 1);
+	memcpy(workstr, VARDATA(txt), nbytes);
+	workstr[nbytes] = '\0';
+
+	/* Output workspace cannot have more codes than input bytes */
+	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+	/* Do the conversion */
+	ncodes = mbstowcs(result, workstr, nbytes + 1);
+
+	if (ncodes == (size_t) -1)
+	{
+		/*
+		 * Invalid multibyte character encountered.  We try to give a useful
+		 * error message by letting pg_verifymbstr check the string.  But
+		 * it's possible that the string is OK to us, and not OK to mbstowcs
+		 * --- this suggests that the LC_CTYPE locale is different from the
+		 * database encoding.  Give a generic error message if verifymbstr
+		 * can't find anything wrong.
+		 */
+		pg_verifymbstr(workstr, nbytes, false);
+		ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 errmsg("invalid multibyte character for locale")));
+	}
+
+	Assert(ncodes <= (size_t) nbytes);
+
+	return result;
+}
+
+
+/*
+ * Convert a wchar string into a palloc'd TEXT value.  The wchar string
+ * must be zero-terminated, but we also require the caller to pass the string
+ * length, since it will know it anyway in current uses.
+ */
+static text *
+wcstotext(const wchar_t *str, int ncodes)
+{
+	text	   *result;
+	size_t		nbytes;
+
+	/* Overflow paranoia */
+	if (ncodes < 0 ||
+		ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Make workspace certainly large enough for result */
+	result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
+
+	/* Do the conversion */
+	nbytes = wcstombs((char *) VARDATA(result), str,
+					  (ncodes + 1) * MB_CUR_MAX);
+
+	if (nbytes == (size_t) -1)
+	{
+		/* Invalid multibyte character encountered ... shouldn't happen */
+		ereport(ERROR,
+				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+				 errmsg("invalid multibyte character for locale")));
+	}
+
+	Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
+
+	VARATT_SIZEP(result) = nbytes + VARHDRSZ;
+
+	return result;
+}
+
+#endif /* USE_WIDE_UPPER_LOWER */
+
+
 /********************************************************************
  *
  * lower
@@ -43,21 +165,45 @@
 Datum
 lower(PG_FUNCTION_ARGS)
 {
-	text	   *string = PG_GETARG_TEXT_P_COPY(0);
-	char	   *ptr;
-	int			m;
+#ifdef USE_WIDE_UPPER_LOWER
+	/* use wide char code only when max encoding length > one */
+	if (pg_database_encoding_max_length() > 1)
+	{
+		text	   *string = PG_GETARG_TEXT_P(0);
+		text	   *result;
+		wchar_t	   *workspace;
+		int			i;
 
-	/* Since we copied the string, we can scribble directly on the value */
-	ptr = VARDATA(string);
-	m = VARSIZE(string) - VARHDRSZ;
+		workspace = texttowcs(string);
 
-	while (m-- > 0)
-	{
-		*ptr = tolower((unsigned char) *ptr);
-		ptr++;
+		for (i = 0; workspace[i] != 0; i++)
+			workspace[i] = towlower(workspace[i]);
+
+		result = wcstotext(workspace, i);
+
+		pfree(workspace);
+
+		PG_RETURN_TEXT_P(result);
 	}
+	else
+#endif /* USE_WIDE_UPPER_LOWER */
+	{
+		text	   *string = PG_GETARG_TEXT_P_COPY(0);
+		char	   *ptr;
+		int			m;
+
+		/* Since we copied the string, we can scribble directly on the value */
+		ptr = VARDATA(string);
+		m = VARSIZE(string) - VARHDRSZ;
+
+		while (m-- > 0)
+		{
+			*ptr = tolower((unsigned char) *ptr);
+			ptr++;
+		}
 
-	PG_RETURN_TEXT_P(string);
+		PG_RETURN_TEXT_P(string);
+	}
 }
 
 
@@ -78,21 +224,45 @@
 Datum
 upper(PG_FUNCTION_ARGS)
 {
-	text	   *string = PG_GETARG_TEXT_P_COPY(0);
-	char	   *ptr;
-	int			m;
+#ifdef USE_WIDE_UPPER_LOWER
+	/* use wide char code only when max encoding length > one */
+	if (pg_database_encoding_max_length() > 1)
+	{
+		text	   *string = PG_GETARG_TEXT_P(0);
+		text	   *result;
+		wchar_t	   *workspace;
+		int			i;
 
-	/* Since we copied the string, we can scribble directly on the value */
-	ptr = VARDATA(string);
-	m = VARSIZE(string) - VARHDRSZ;
+		workspace = texttowcs(string);
 
-	while (m-- > 0)
-	{
-		*ptr = toupper((unsigned char) *ptr);
-		ptr++;
+		for (i = 0; workspace[i] != 0; i++)
+			workspace[i] = towupper(workspace[i]);
+
+		result = wcstotext(workspace, i);
+
+		pfree(workspace);
+
+		PG_RETURN_TEXT_P(result);
 	}
+	else
+#endif /* USE_WIDE_UPPER_LOWER */
+	{
+		text	   *string = PG_GETARG_TEXT_P_COPY(0);
+		char	   *ptr;
+		int			m;
+
+		/* Since we copied the string, we can scribble directly on the value */
+		ptr = VARDATA(string);
+		m = VARSIZE(string) - VARHDRSZ;
+
+		while (m-- > 0)
+		{
+			*ptr = toupper((unsigned char) *ptr);
+			ptr++;
+		}
 
-	PG_RETURN_TEXT_P(string);
+		PG_RETURN_TEXT_P(string);
+	}
 }
 
 
@@ -106,41 +276,67 @@
  *
  * Purpose:
  *
- *	 Returns string, with first letter of each word in uppercase,
- *	 all other letters in lowercase. A word is delimited by white
- *	 space.
+ *	 Returns string, with first letter of each word in uppercase, all
+ *	 other letters in lowercase. A word is defined as a sequence of
+ *	 alphanumeric characters, delimited by non-alphanumeric
+ *	 characters.
  *
  ********************************************************************/
 
 Datum
 initcap(PG_FUNCTION_ARGS)
 {
-	text	   *string = PG_GETARG_TEXT_P_COPY(0);
-	char	   *ptr;
-	int			m;
+#ifdef USE_WIDE_UPPER_LOWER
+	/* use wide char code only when max encoding length > one */
+	if (pg_database_encoding_max_length() > 1)
+	{
+		text	   *string = PG_GETARG_TEXT_P(0);
+		text	   *result;
+		wchar_t	   *workspace;
+		int			wasalnum = 0;
+		int			i;
 
-	/* Since we copied the string, we can scribble directly on the value */
-	ptr = VARDATA(string);
-	m = VARSIZE(string) - VARHDRSZ;
+		workspace = texttowcs(string);
 
-	if (m > 0)
-	{
-		*ptr = toupper((unsigned char) *ptr);
-		ptr++;
-		m--;
-	}
+		for (i = 0; workspace[i] != 0; i++)
+		{
+			if (wasalnum)
+				workspace[i] = towlower(workspace[i]);
+			else
+				workspace[i] = towupper(workspace[i]);
+			wasalnum = iswalnum(workspace[i]);
+		}
 
-	while (m-- > 0)
-	{
-		/* Oracle capitalizes after all non-alphanumeric */
-		if (!isalnum((unsigned char) ptr[-1]))
-			*ptr = toupper((unsigned char) *ptr);
-		else
-			*ptr = tolower((unsigned char) *ptr);
-		ptr++;
+		result = wcstotext(workspace, i);
+
+		pfree(workspace);
+
+		PG_RETURN_TEXT_P(result);
 	}
+	else
+#endif /* USE_WIDE_UPPER_LOWER */
+	{
+		text	   *string = PG_GETARG_TEXT_P_COPY(0);
+		int			wasalnum = 0;
+		char	   *ptr;
+		int			m;
+
+		/* Since we copied the string, we can scribble directly on the value */
+		ptr = VARDATA(string);
+		m = VARSIZE(string) - VARHDRSZ;
+
+		while (m-- > 0)
+		{
+			if (wasalnum)
+				*ptr = tolower((unsigned char) *ptr);
+			else
+				*ptr = toupper((unsigned char) *ptr);
+			wasalnum = isalnum((unsigned char) *ptr);
+			ptr++;
+		}
 
-	PG_RETURN_TEXT_P(string);
+		PG_RETURN_TEXT_P(string);
+	}
 }
 
 
@@ -872,7 +1068,7 @@
  ********************************************************************/
 
 Datum
-chr			(PG_FUNCTION_ARGS)
+chr(PG_FUNCTION_ARGS)
 {
 	int32		cvalue = PG_GETARG_INT32(0);
 	text	   *result;
--- postgresql-7.4.5/configure.in	2004-08-18 05:11:25.000000000 +0200
+++ postgresql-8.0.0beta1/configure.in	2004-08-09 01:27:11.000000000 +0200
@@ -866,7 +810,7 @@
 # SunOS doesn't handle negative byte comparisons properly with +/- return
 AC_FUNC_MEMCMP

-AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf utime utimes waitpid])
+AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getpeereid memmove poll pstat setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs])

 AC_CHECK_DECLS(fdatasync, [], [], [#include <unistd.h>])


---  postgresql-7.4.5/src/include/pg_config.h.in~	2004-03-20 16:39:40.000000000 +0100
+++  postgresql-7.4.5/src/include/pg_config.h.in	2004-08-26 13:18:28.000000000 +0200
@@ -509,6 +509,9 @@
    `HAVE_STRUCT_TM_TM_ZONE' instead. */
 #undef HAVE_TM_ZONE
 
+/* Define to 1 if you have the `towlower' function. */
+#undef HAVE_TOWLOWER
+
 /* Define to 1 if you have the external array `tzname'. */
 #undef HAVE_TZNAME
 
@@ -542,6 +545,9 @@
 /* Define to 1 if you have the `waitpid' function. */
 #undef HAVE_WAITPID
 
+/* Define to 1 if you have the `wcstombs' function. */
+#undef HAVE_WCSTOMBS
+
 /* Define to the appropriate snprintf format for 64-bit ints, if any. */
 #undef INT64_FORMAT