From: Angelos Karageorgiou <angelos(at)incredible(dot)com>
To: pgsql-hackers(at)hub(dot)org
Subject:
Date: 1999-02-10 19:15:45
Message-ID: 199902101915.VAA07013@awesome.incredible.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I
have developed. I knew that regexes with postgress would not work because
of something I did but a posting from another follow from Sweden gave me a
clue that the problem must be with the regex package and not the locale.

So I investigated the code and found out the pg_isdigit(int ch),
pg_isalpha(int ch) and the associated functions do a comparison of
characters as ints. I changed a few crucial points with a cast to
(unsigned char) and voila , regexs in Greek with full locale support. My
guess is that an int != unsigned char when comparing, the sign bit is
probably the culprit.

Please test the patch on some other language too, Swedish or Finish
would be a nice touch.

Patch follows, but it is trivial really.
---------------------------------------------------------------------------------
*** regcomp.c Tue Sep 1 07:31:25 1998
--- regcomp.c.patched Wed Feb 10 19:57:11 1999
***************
*** 1038,1046 ****
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! return tolower(ch);
else if (pg_islower(ch))
! return toupper(ch);
else
/* peculiar, but could happen */
return ch;
--- 1038,1046 ----
{
assert(pg_isalpha(ch));
if (pg_isupper(ch))
! return tolower((unsigned char)ch);
else if (pg_islower(ch))
! return toupper((unsigned char)ch);
else
/* peculiar, but could happen */
return ch;
***************
*** 1055,1067 ****
static void
bothcases(p, ch)
struct parse *p;
! int ch;
{
pg_wchar *oldnext = p->next;
pg_wchar *oldend = p->end;
pg_wchar bracket[3];

! assert(othercase(ch) != ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
--- 1055,1067 ----
static void
bothcases(p, ch)
struct parse *p;
! int ch;
{
pg_wchar *oldnext = p->next;
pg_wchar *oldend = p->end;
pg_wchar bracket[3];

! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */
p->next = bracket;
p->end = bracket + 2;
bracket[0] = ch;
***************
*** 1084,1090 ****
{
cat_t *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else
{
--- 1084,1090 ----
{
cat_t *cap = p->g->categories;

! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch)
bothcases(p, ch);
else
{
***************
*** 1862,1868 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit(c));
#endif
}

--- 1862,1868 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isdigit(c));
#else
! return (isdigit((unsigned char)c));
#endif
}

***************
*** 1872,1878 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha(c));
#endif
}

--- 1872,1878 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isalpha(c));
#else
! return (isalpha((unsigned char)c));
#endif
}

***************
*** 1882,1888 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper(c));
#endif
}

--- 1882,1888 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && isupper(c));
#else
! return (isupper((unsigned char)c));
#endif
}

***************
*** 1892,1897 ****
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! return (islower(c));
#endif
}
--- 1892,1897 ----
#ifdef MULTIBYTE
return (c >= 0 && c <= UCHAR_MAX && islower(c));
#else
! return (islower((unsigned char)c));
#endif
}

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Jackson, DeJuan 1999-02-10 19:45:53 Possible bug on update
Previous Message Michael Meskes 1999-02-10 18:30:34 Re: [HACKERS] Keywords