Re: [PORTS] Locale bug

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Andriy I Pilipenko <bamby(at)marka(dot)net(dot)ua>
Cc: pgsql-patches(at)postgresql(dot)org, pgsql-ports(at)postgresql(dot)org
Subject: Re: [PORTS] Locale bug
Date: 2000-09-30 02:42:12
Message-ID: 200009300242.WAA03359@candle.pha.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-patches pgsql-ports

Can someone comment on this? As I remember, there was a problem with
this patch on other platforms.

> ============================================================================
> POSTGRESQL BUG REPORT TEMPLATE
> ============================================================================
>
>
> Your name : Andriy I Pilipenko
> Your email address : bamby(at)marka(dot)net(dot)ua
>
>
> System Configuration
> ---------------------
> Architecture (example: Intel Pentium) : Intel Pentium
>
> Operating System (example: Linux 2.0.26 ELF) : FreeBSD 3.x, 4.0
>
> PostgreSQL version (example: PostgreSQL-6.5.1): PostgreSQL-6.5.3,
> PostgreSQL-7.0.beta5
>
> Compiler used (example: gcc 2.8.0) : gcc 2.7.2.2, gcc 2.9.5
>
>
> Please enter a FULL description of your problem:
> ------------------------------------------------
>
> There is at least FreeBSD specific bug in PostgreSQL. If Postgres
> configured with locale support but without multibyte support one cannot
> perform case insensitive search using national language characters.
> Problem comes from declaration pg_wchar as char for non-multibyte mode.
> Character values above 127 considered to be negative values and this
> result in improper return values of functions isalpha(), isupper() etc.
> Declaring pg_wchar as unsigned char eliminates this problem.
>
> This problem not exists on Linux. On this system functions like isalpha(),
> isupper() etc. successfully accept negative values as well as their
> positive counterparts.
>
>
> Please describe a way to repeat the problem. Please try to provide a
> concise reproducible example, if at all possible:
> ----------------------------------------------------------------------
>
> Compile and install postgres with locale support enabled and multibyte
> support disabled on FreeBSD. Create table with field of some character
> type. Put in the table couple of recods with some character with code
> above 127 in lower and upper case. Try query like this:
>
> SELECT * FROM table WHERE field ~* '<the_character>'
>
> where <the_character> is the mentioned character. You will receive only
> one record with character exactly the same as in query.
>
>
> If you know how this problem might be fixed, list the solution below:
> ---------------------------------------------------------------------
>
> Here is the patch. I tried it on FreeBSD and Linux with success. This
> patch applies to PostgreSQL 6.5.3 and 7.0.beta5.
>
>
> Index: postgres/src/backend/regex/engine.c
> diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2
> *** postgres/src/backend/regex/engine.c:1.1.1.1 Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/engine.c Wed Apr 19 09:46:38 2000
> ***************
> *** 123,130 ****
> #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
> #define NNONCHAR (CODEMAX-16777216)
> #else
> ! #define NONCHAR(c) ((c) > CHAR_MAX)
> ! #define NNONCHAR (CODEMAX-CHAR_MAX)
> #endif
>
> #ifdef REDEBUG
> --- 123,130 ----
> #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */
> #define NNONCHAR (CODEMAX-16777216)
> #else
> ! #define NONCHAR(c) ((c) > UCHAR_MAX)
> ! #define NNONCHAR (CODEMAX-UCHAR_MAX)
> #endif
>
> #ifdef REDEBUG
> ***************
> *** 958,965 ****
> == #define BOW (BOL+4)
> == #define EOW (BOL+5)
> == #define CODEMAX (BOL+5) // highest code used
> ! == #define NONCHAR(c) ((c) > CHAR_MAX)
> ! == #define NNONCHAR (CODEMAX-CHAR_MAX)
> */
> static states
> step(g, start, stop, bef, ch, aft)
> --- 958,965 ----
> == #define BOW (BOL+4)
> == #define EOW (BOL+5)
> == #define CODEMAX (BOL+5) // highest code used
> ! == #define NONCHAR(c) ((c) > UCHAR_MAX)
> ! == #define NNONCHAR (CODEMAX-UCHAR_MAX)
> */
> static states
> step(g, start, stop, bef, ch, aft)
> Index: postgres/src/backend/regex/regcomp.c
> diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2
> *** postgres/src/backend/regex/regcomp.c:1.1.1.1 Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/regcomp.c Wed Apr 19 09:46:38 2000
> ***************
> *** 97,107 ****
> static void p_b_eclass(struct parse * p, cset *cs);
> static pg_wchar p_b_symbol(struct parse * p);
> static char p_b_coll_elem(struct parse * p, int endc);
> - #ifdef MULTIBYTE
> static unsigned char othercase(int ch);
> - #else
> - static char othercase(int ch);
> - #endif
> static void bothcases(struct parse * p, int ch);
> static void ordinary(struct parse * p, int ch);
> static void nonnewline(struct parse * p);
> --- 97,103 ----
> ***************
> *** 224,232 ****
> return REG_INVARG;
> len = preg->re_endp - wcp;
> #else
> ! if (preg->re_endp < pattern)
> return REG_INVARG;
> ! len = preg->re_endp - pattern;
> #endif
> }
> else
> --- 220,228 ----
> return REG_INVARG;
> len = preg->re_endp - wcp;
> #else
> ! if (preg->re_endp < (pg_wchar *) pattern)
> return REG_INVARG;
> ! len = preg->re_endp - (pg_wchar *) pattern;
> #endif
> }
> else
> ***************
> *** 1038,1071 ****
> - othercase - return the case counterpart of an alphabetic
> == static char othercase(int ch);
> */
> - #ifdef MULTIBYTE
> static unsigned char /* if no counterpart, return ch */
> - #else
> - static char /* if no counterpart, return ch */
> - #endif
> othercase(ch)
> int ch;
> {
> assert(pg_isalpha(ch));
> if (pg_isupper(ch))
> - #ifdef MULTIBYTE
> - return (unsigned char) tolower(ch);
> - #else
> return tolower(ch);
> - #endif
> else if (pg_islower(ch))
> - #ifdef MULTIBYTE
> - return (unsigned char) toupper(ch);
> - #else
> return toupper(ch);
> - #endif
> else
> /* peculiar, but could happen */
> - #ifdef MULTIBYTE
> - return (unsigned char) ch;
> - #else
> return ch;
> - #endif
> }
>
> /*
> --- 1034,1051 ----
> Index: postgres/src/include/mb/pg_wchar.h
> diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2
> *** postgres/src/include/mb/pg_wchar.h:1.1.1.1 Tue Apr 18 21:45:31 2000
> --- postgres/src/include/mb/pg_wchar.h Wed Apr 19 09:46:42 2000
> ***************
> *** 34,40 ****
> typedef unsigned int pg_wchar;
>
> #else
> ! #define pg_wchar char
> #endif
>
> /*
> --- 34,40 ----
> typedef unsigned int pg_wchar;
>
> #else
> ! typedef unsigned char pg_wchar;
> #endif
>
> /*
> Index: postgres/src/include/regex/regex2.h
> diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2
> *** postgres/src/include/regex/regex2.h:1.1.1.1 Tue Apr 18 21:45:35 2000
> --- postgres/src/include/regex/regex2.h Wed Apr 19 09:46:47 2000
> ***************
> *** 201,207 ****
> #ifdef MULTIBYTE
> #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */
> #else
> ! #define OUT (CHAR_MAX+1) /* a non-character value */
> #endif
>
> #ifdef MULTIBYTE
> --- 201,207 ----
> #ifdef MULTIBYTE
> #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */
> #else
> ! #define OUT (UCHAR_MAX+1) /* a non-character value */
> #endif
>
> #ifdef MULTIBYTE
>
>

--
Bruce Momjian | http://candle.pha.pa.us
pgman(at)candle(dot)pha(dot)pa(dot)us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026

In response to

  • Locale bug at 2000-04-19 09:21:48 from Andriy I Pilipenko

Browse pgsql-patches by date

  From Date Subject
Next Message Denis Perchine 2000-10-01 10:51:59 Patch to support transactions with BLOBs
Previous Message Bruce Momjian 2000-09-29 22:00:36 Re: AIX patch to fix problems with new fmgr

Browse pgsql-ports by date

  From Date Subject
Next Message Adriaan Joubert 2000-10-02 05:21:45 Re: Alpha spinlock
Previous Message Tom Lane 2000-09-29 15:45:47 Re: Strange error message