Skip site navigation (1) Skip section navigation (2)

Re: [PORTS] Locale bug

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Andriy I Pilipenko <bamby(at)marka(dot)net(dot)ua>
Cc: pgsql-patches(at)postgresql(dot)org, pgsql-ports(at)postgresql(dot)org
Subject: Re: [PORTS] Locale bug
Date: 2000-06-12 23:53:48
Message-ID: 200006122353.TAA10428@candle.pha.pa.us (view raw or flat)
Thread:
Lists: pgsql-patchespgsql-ports
Can someone comment on this?

> ============================================================================
>                         POSTGRESQL BUG REPORT TEMPLATE
> ============================================================================
> 
> 
> Your name		:	Andriy I Pilipenko
> Your email address	:	bamby(at)marka(dot)net(dot)ua
> 
> 
> System Configuration
> ---------------------
>   Architecture (example: Intel Pentium)  	:  Intel Pentium
> 
>   Operating System (example: Linux 2.0.26 ELF) 	:  FreeBSD 3.x, 4.0
> 
>   PostgreSQL version (example: PostgreSQL-6.5.1):  PostgreSQL-6.5.3,
>                                                    PostgreSQL-7.0.beta5
> 
>   Compiler used (example:  gcc 2.8.0)		:  gcc 2.7.2.2, gcc 2.9.5
> 
> 
> Please enter a FULL description of your problem:
> ------------------------------------------------
> 
> There is at least FreeBSD specific bug in PostgreSQL. If Postgres
> configured with locale support but without multibyte support one cannot
> perform case insensitive search using national language characters.
> Problem comes from declaration pg_wchar as char for non-multibyte mode.
> Character values above 127 considered to be negative values and this
> result in improper return values of functions isalpha(), isupper() etc.
> Declaring pg_wchar as unsigned char eliminates this problem.
> 
> This problem not exists on Linux. On this system functions like isalpha(),
> isupper() etc. successfully accept negative values as well as their
> positive counterparts.
> 
> 
> Please describe a way to repeat the problem.   Please try to provide a
> concise reproducible example, if at all possible: 
> ----------------------------------------------------------------------
> 
> Compile and install postgres with locale support enabled and multibyte
> support disabled on FreeBSD. Create table with field of some character
> type. Put in the table couple of recods with some character with code
> above 127 in lower and upper case. Try query like this:
> 
>   SELECT * FROM table WHERE field ~* '<the_character>'
> 
> where <the_character> is the mentioned character. You will receive only
> one record with character exactly the same as in query.
> 
> 
> If you know how this problem might be fixed, list the solution below:
> ---------------------------------------------------------------------
> 
> Here is the patch. I tried it on FreeBSD and Linux with success. This
> patch applies to PostgreSQL 6.5.3 and 7.0.beta5.
> 
> 
> Index: postgres/src/backend/regex/engine.c
> diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2
> *** postgres/src/backend/regex/engine.c:1.1.1.1	Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/engine.c	Wed Apr 19 09:46:38 2000
> ***************
> *** 123,130 ****
>   #define NONCHAR(c)	  ((c) > 16777216)	/* 16777216 == 2^24 == 3 bytes */
>   #define NNONCHAR  (CODEMAX-16777216)
>   #else
> ! #define NONCHAR(c)		  ((c) > CHAR_MAX)
> ! #define NNONCHAR	  (CODEMAX-CHAR_MAX)
>   #endif
>   
>   #ifdef REDEBUG
> --- 123,130 ----
>   #define NONCHAR(c)	  ((c) > 16777216)	/* 16777216 == 2^24 == 3 bytes */
>   #define NNONCHAR  (CODEMAX-16777216)
>   #else
> ! #define NONCHAR(c)		  ((c) > UCHAR_MAX)
> ! #define NNONCHAR	  (CODEMAX-UCHAR_MAX)
>   #endif
>   
>   #ifdef REDEBUG
> ***************
> *** 958,965 ****
>    == #define		BOW		(BOL+4)
>    == #define		EOW		(BOL+5)
>    == #define		CODEMAX (BOL+5)			// highest code used
> !  == #define		NONCHAR(c)		((c) > CHAR_MAX)
> !  == #define		NNONCHAR		(CODEMAX-CHAR_MAX)
>    */
>   static states
>   step(g, start, stop, bef, ch, aft)
> --- 958,965 ----
>    == #define		BOW		(BOL+4)
>    == #define		EOW		(BOL+5)
>    == #define		CODEMAX (BOL+5)			// highest code used
> !  == #define		NONCHAR(c)		((c) > UCHAR_MAX)
> !  == #define		NNONCHAR		(CODEMAX-UCHAR_MAX)
>    */
>   static states
>   step(g, start, stop, bef, ch, aft)
> Index: postgres/src/backend/regex/regcomp.c
> diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2
> *** postgres/src/backend/regex/regcomp.c:1.1.1.1	Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/regcomp.c	Wed Apr 19 09:46:38 2000
> ***************
> *** 97,107 ****
>   	static void p_b_eclass(struct parse * p, cset *cs);
>   	static pg_wchar p_b_symbol(struct parse * p);
>   	static char p_b_coll_elem(struct parse * p, int endc);
> - #ifdef MULTIBYTE
>   	static unsigned char othercase(int ch);
> - #else
> - 	static char othercase(int ch);
> - #endif
>   	static void bothcases(struct parse * p, int ch);
>   	static void ordinary(struct parse * p, int ch);
>   	static void nonnewline(struct parse * p);
> --- 97,103 ----
> ***************
> *** 224,232 ****
>   			return REG_INVARG;
>   		len = preg->re_endp - wcp;
>   #else
> ! 		if (preg->re_endp < pattern)
>   			return REG_INVARG;
> ! 		len = preg->re_endp - pattern;
>   #endif
>   	}
>   	else
> --- 220,228 ----
>   			return REG_INVARG;
>   		len = preg->re_endp - wcp;
>   #else
> ! 		if (preg->re_endp < (pg_wchar *) pattern)
>   			return REG_INVARG;
> ! 		len = preg->re_endp - (pg_wchar *) pattern;
>   #endif
>   	}
>   	else
> ***************
> *** 1038,1071 ****
>    - othercase - return the case counterpart of an alphabetic
>    == static char othercase(int ch);
>    */
> - #ifdef MULTIBYTE
>   static unsigned char			/* if no counterpart, return ch */
> - #else
> - static char						/* if no counterpart, return ch */
> - #endif
>   othercase(ch)
>   int			ch;
>   {
>   	assert(pg_isalpha(ch));
>   	if (pg_isupper(ch))
> - #ifdef MULTIBYTE
> - 		return (unsigned char) tolower(ch);
> - #else
>   		return tolower(ch);
> - #endif
>   	else if (pg_islower(ch))
> - #ifdef MULTIBYTE
> - 		return (unsigned char) toupper(ch);
> - #else
>   		return toupper(ch);
> - #endif
>   	else
>   /* peculiar, but could happen */
> - #ifdef MULTIBYTE
> - 		return (unsigned char) ch;
> - #else
>   		return ch;
> - #endif
>   }
>   
>   /*
> --- 1034,1051 ----
> Index: postgres/src/include/mb/pg_wchar.h
> diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2
> *** postgres/src/include/mb/pg_wchar.h:1.1.1.1	Tue Apr 18 21:45:31 2000
> --- postgres/src/include/mb/pg_wchar.h	Wed Apr 19 09:46:42 2000
> ***************
> *** 34,40 ****
>   typedef unsigned int pg_wchar;
>   
>   #else
> ! #define pg_wchar char
>   #endif
>   
>   /*
> --- 34,40 ----
>   typedef unsigned int pg_wchar;
>   
>   #else
> ! typedef unsigned char pg_wchar;
>   #endif
>   
>   /*
> Index: postgres/src/include/regex/regex2.h
> diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2
> *** postgres/src/include/regex/regex2.h:1.1.1.1	Tue Apr 18 21:45:35 2000
> --- postgres/src/include/regex/regex2.h	Wed Apr 19 09:46:47 2000
> ***************
> *** 201,207 ****
>   #ifdef MULTIBYTE
>   #define OUT		  (16777216+1)	/* 16777216 == 2^24 == 3 bytes */
>   #else
> ! #define OUT		  (CHAR_MAX+1)	/* a non-character value */
>   #endif
>   
>   #ifdef MULTIBYTE
> --- 201,207 ----
>   #ifdef MULTIBYTE
>   #define OUT		  (16777216+1)	/* 16777216 == 2^24 == 3 bytes */
>   #else
> ! #define OUT		  (UCHAR_MAX+1)	/* a non-character value */
>   #endif
>   
>   #ifdef MULTIBYTE
> 
> 


-- 
  Bruce Momjian                        |  http://www.op.net/~candle
  pgman(at)candle(dot)pha(dot)pa(dot)us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

In response to

  • Locale bug at 2000-04-19 09:21:48 from Andriy I Pilipenko

pgsql-ports by date

Next:From: luc00Date: 2000-06-13 07:48:35
Subject: SELF Installing NT pgsql ?
Previous:From: Bruce MomjianDate: 2000-06-12 15:31:16
Subject: Re: Installation instructions for NT?

pgsql-patches by date

Next:From: Bruce MomjianDate: 2000-06-13 00:03:35
Subject: Re: libpq++ update
Previous:From: Tom LaneDate: 2000-06-12 21:34:07
Subject: Re: Caching number of blocks in relation to avoi lseek.

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group