Locale bug

From: Andriy I Pilipenko <bamby(at)marka(dot)net(dot)ua>
To: pgsql-patches(at)postgresql(dot)org, pgsql-ports(at)postgresql(dot)org
Subject: Locale bug
Date: 2000-04-19 09:21:48
Message-ID: Pine.BSF.4.21.0004191118560.87161-100000@bamby.marka.net.ua
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-patches pgsql-ports

============================================================================
POSTGRESQL BUG REPORT TEMPLATE
============================================================================

Your name : Andriy I Pilipenko
Your email address : bamby(at)marka(dot)net(dot)ua

System Configuration
---------------------
Architecture (example: Intel Pentium) : Intel Pentium

Operating System (example: Linux 2.0.26 ELF) : FreeBSD 3.x, 4.0

PostgreSQL version (example: PostgreSQL-6.5.1): PostgreSQL-6.5.3,
PostgreSQL-7.0.beta5

Compiler used (example: gcc 2.8.0) : gcc 2.7.2.2, gcc 2.9.5

Please enter a FULL description of your problem:
------------------------------------------------

There is at least FreeBSD specific bug in PostgreSQL. If Postgres
configured with locale support but without multibyte support one cannot
perform case insensitive search using national language characters.
Problem comes from declaration pg_wchar as char for non-multibyte mode.
Character values above 127 considered to be negative values and this
result in improper return values of functions isalpha(), isupper() etc.
Declaring pg_wchar as unsigned char eliminates this problem.

This problem not exists on Linux. On this system functions like isalpha(),
isupper() etc. successfully accept negative values as well as their
positive counterparts.

Please describe a way to repeat the problem. Please try to provide a
concise reproducible example, if at all possible:
----------------------------------------------------------------------

Compile and install postgres with locale support enabled and multibyte
support disabled on FreeBSD. Create table with field of some character
type. Put in the table couple of recods with some character with code
above 127 in lower and upper case. Try query like this:

SELECT * FROM table WHERE field ~* '<the_character>'

where <the_character> is the mentioned character. You will receive only
one record with character exactly the same as in query.

If you know how this problem might be fixed, list the solution below:
---------------------------------------------------------------------

Here is the patch. I tried it on FreeBSD and Linux with success. This
patch applies to PostgreSQL 6.5.3 and 7.0.beta5.

Index: postgres/src/backend/regex/engine.c
diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2
*** postgres/src/backend/regex/engine.c:1.1.1.1 Tue Apr 18 21:45:09 2000
--- postgres/src/backend/regex/engine.c Wed Apr 19 09:46:38 2000
***************
*** 123,130 ****
#define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
#define NNONCHAR (CODEMAX-16777216)
#else
! #define NONCHAR(c) ((c) > CHAR_MAX)
! #define NNONCHAR (CODEMAX-CHAR_MAX)
#endif

#ifdef REDEBUG
--- 123,130 ----
#define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
#define NNONCHAR (CODEMAX-16777216)
#else
! #define NONCHAR(c) ((c) > UCHAR_MAX)
! #define NNONCHAR (CODEMAX-UCHAR_MAX)
#endif

#ifdef REDEBUG
***************
*** 958,965 ****
== #define BOW (BOL+4)
== #define EOW (BOL+5)
== #define CODEMAX (BOL+5) // highest code used
! == #define NONCHAR(c) ((c) > CHAR_MAX)
! == #define NNONCHAR (CODEMAX-CHAR_MAX)
*/
static states
step(g, start, stop, bef, ch, aft)
--- 958,965 ----
== #define BOW (BOL+4)
== #define EOW (BOL+5)
== #define CODEMAX (BOL+5) // highest code used
! == #define NONCHAR(c) ((c) > UCHAR_MAX)
! == #define NNONCHAR (CODEMAX-UCHAR_MAX)
*/
static states
step(g, start, stop, bef, ch, aft)
Index: postgres/src/backend/regex/regcomp.c
diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2
*** postgres/src/backend/regex/regcomp.c:1.1.1.1 Tue Apr 18 21:45:09 2000
--- postgres/src/backend/regex/regcomp.c Wed Apr 19 09:46:38 2000
***************
*** 97,107 ****
static void p_b_eclass(struct parse * p, cset *cs);
static pg_wchar p_b_symbol(struct parse * p);
static char p_b_coll_elem(struct parse * p, int endc);
- #ifdef MULTIBYTE
static unsigned char othercase(int ch);
- #else
- static char othercase(int ch);
- #endif
static void bothcases(struct parse * p, int ch);
static void ordinary(struct parse * p, int ch);
static void nonnewline(struct parse * p);
--- 97,103 ----
***************
*** 224,232 ****
return REG_INVARG;
len = preg->re_endp - wcp;
#else
! if (preg->re_endp < pattern)
return REG_INVARG;
! len = preg->re_endp - pattern;
#endif
}
else
--- 220,228 ----
return REG_INVARG;
len = preg->re_endp - wcp;
#else
! if (preg->re_endp < (pg_wchar *) pattern)
return REG_INVARG;
! len = preg->re_endp - (pg_wchar *) pattern;
#endif
}
else
***************
*** 1038,1071 ****
- othercase - return the case counterpart of an alphabetic
== static char othercase(int ch);
*/
- #ifdef MULTIBYTE
static unsigned char /* if no counterpart, return ch */
- #else
- static char /* if no counterpart, return ch */
- #endif
othercase(ch)
int ch;
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
- #ifdef MULTIBYTE
- return (unsigned char) tolower(ch);
- #else
return tolower(ch);
- #endif
else if (pg_islower(ch))
- #ifdef MULTIBYTE
- return (unsigned char) toupper(ch);
- #else
return toupper(ch);
- #endif
else
/* peculiar, but could happen */
- #ifdef MULTIBYTE
- return (unsigned char) ch;
- #else
return ch;
- #endif
}

/*
--- 1034,1051 ----
Index: postgres/src/include/mb/pg_wchar.h
diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2
*** postgres/src/include/mb/pg_wchar.h:1.1.1.1 Tue Apr 18 21:45:31 2000
--- postgres/src/include/mb/pg_wchar.h Wed Apr 19 09:46:42 2000
***************
*** 34,40 ****
typedef unsigned int pg_wchar;

#else
! #define pg_wchar char
#endif

/*
--- 34,40 ----
typedef unsigned int pg_wchar;

#else
! typedef unsigned char pg_wchar;
#endif

/*
Index: postgres/src/include/regex/regex2.h
diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2
*** postgres/src/include/regex/regex2.h:1.1.1.1 Tue Apr 18 21:45:35 2000
--- postgres/src/include/regex/regex2.h Wed Apr 19 09:46:47 2000
***************
*** 201,207 ****
#ifdef MULTIBYTE
#define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */
#else
! #define OUT (CHAR_MAX+1) /* a non-character value */
#endif

#ifdef MULTIBYTE
--- 201,207 ----
#ifdef MULTIBYTE
#define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */
#else
! #define OUT (UCHAR_MAX+1) /* a non-character value */
#endif

#ifdef MULTIBYTE

Responses

Browse pgsql-patches by date

  From Date Subject
Next Message Tom Vijlbrief 2000-04-20 15:11:22 libpq++ update
Previous Message Lamar Owen 2000-04-06 19:14:01 Re: pg_dumplo, thanks :) (fwd)

Browse pgsql-ports by date

  From Date Subject
Next Message John Boris 2000-04-19 17:12:03 Problem compiling 7.0beta5 on SCO Openserver5.0.5
Previous Message Lamar Owen 2000-04-18 17:29:28 7.0RC1-0.6 RPM's now available.