Re: regexp character class locale awareness patch

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
Cc: Manuel Sugawara <masm(at)fciencias(dot)unam(dot)mx>, Peter Eisentraut <peter_e(at)gmx(dot)net>, Tatsuo Ishii <t-ishii(at)sra(dot)co(dot)jp>, pgsql-hackers(at)postgresql(dot)org
Subject: Re: regexp character class locale awareness patch
Date: 2002-04-17 21:55:00
Message-ID: 200204172155.g3HLt0n07537@candle.pha.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers


Your patch has been added to the PostgreSQL unapplied patches list at:

http://candle.pha.pa.us/cgi-bin/pgpatches

I will try to apply it within the next 48 hours.

---------------------------------------------------------------------------

Bruce Momjian wrote:
> Manuel Sugawara wrote:
> > Peter Eisentraut <peter_e(at)gmx(dot)net> writes:
> > >
> > > Basically, you manually preprocess the patch to include the
> > > USE_LOCALE branch and remove the not USE_LOCALE branch.
> >
> > Yeah, that should work. You may also remove include/regex/cclass.h
> > since it will not be used any more.
> >
> > > However, if the no-locale branches have significant performance
> > > benefits then it might be worth pondering setting up some
> > > optimizations.
> >
> > This is not the case.
>
> Here is a patch based on this discussion.
>
> --
> Bruce Momjian | http://candle.pha.pa.us
> pgman(at)candle(dot)pha(dot)pa(dot)us | (610) 853-3000
> + If your life is a hard drive, | 830 Blythe Avenue
> + Christ can be your backup. | Drexel Hill, Pennsylvania 19026

> Index: src/backend/regex/regcomp.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/backend/regex/regcomp.c,v
> retrieving revision 1.28
> diff -c -r1.28 regcomp.c
> *** src/backend/regex/regcomp.c 28 Oct 2001 06:25:49 -0000 1.28
> --- src/backend/regex/regcomp.c 16 Apr 2002 23:12:38 -0000
> ***************
> *** 47,53 ****
> #include "regex/regex.h"
> #include "regex/utils.h"
> #include "regex/regex2.h"
> ! #include "regex/cclass.h"
> #include "regex/cname.h"
>
> /*
> --- 47,60 ----
> #include "regex/regex.h"
> #include "regex/utils.h"
> #include "regex/regex2.h"
> ! struct cclass
> ! {
> ! char *name;
> ! char *chars;
> ! char *multis;
> ! };
> ! static struct cclass* cclasses = NULL;
> ! static struct cclass* cclass_init(void);
> #include "regex/cname.h"
>
> /*
> ***************
> *** 174,179 ****
> --- 181,189 ----
> pg_wchar *wcp;
> #endif
>
> + if ( cclasses == NULL )
> + cclasses = cclass_init();
> +
> #ifdef REDEBUG
> #define GOODFLAGS(f) (f)
> #else
> ***************
> *** 884,890 ****
> struct cclass *cp;
> size_t len;
> char *u;
> ! char c;
>
> while (MORE() && pg_isalpha(PEEK()))
> NEXT();
> --- 894,900 ----
> struct cclass *cp;
> size_t len;
> char *u;
> ! unsigned char c;
>
> while (MORE() && pg_isalpha(PEEK()))
> NEXT();
> ***************
> *** 905,911 ****
>
> u = cp->chars;
> while ((c = *u++) != '\0')
> ! CHadd(cs, c);
> for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
> MCadd(p, cs, u);
> }
> --- 915,921 ----
>
> u = cp->chars;
> while ((c = *u++) != '\0')
> ! CHadd(cs, c);
> for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
> MCadd(p, cs, u);
> }
> ***************
> *** 1715,1718 ****
> --- 1725,1788 ----
> #else
> return (islower((unsigned char) c));
> #endif
> + }
> +
> + static struct cclass *
> + cclass_init(void)
> + {
> + struct cclass *cp = NULL;
> + struct cclass *classes = NULL;
> + struct cclass_factory
> + {
> + char *name;
> + int (*func)(int);
> + char *chars;
> + } cclass_factories [] =
> + {
> + { "alnum", isalnum, NULL },
> + { "alpha", isalpha, NULL },
> + { "blank", NULL, " \t" },
> + { "cntrl", iscntrl, NULL },
> + { "digit", NULL, "0123456789" },
> + { "graph", isgraph, NULL },
> + { "lower", islower, NULL },
> + { "print", isprint, NULL },
> + { "punct", ispunct, NULL },
> + { "space", NULL, "\t\n\v\f\r " },
> + { "upper", isupper, NULL },
> + { "xdigit", isxdigit, NULL },
> + { NULL, NULL, NULL }
> + };
> + struct cclass_factory *cf = NULL;
> +
> + classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
> + if (classes == NULL)
> + elog(ERROR,"cclass_init: out of memory");
> +
> + cp = classes;
> + for(cf = cclass_factories; cf->name != NULL; cf++)
> + {
> + cp->name = strdup(cf->name);
> + if ( cf->chars )
> + cp->chars = strdup(cf->chars);
> + else
> + {
> + int x = 0, y = 0;
> + cp->chars = malloc(sizeof(char) * 256);
> + if (cp->chars == NULL)
> + elog(ERROR,"cclass_init: out of memory");
> + for (x = 0; x < 256; x++)
> + {
> + if((cf->func)(x))
> + *(cp->chars + y++) = x;
> + }
> + *(cp->chars + y) = '\0';
> + }
> + cp->multis = "";
> + cp++;
> + }
> + cp->name = cp->chars = NULL;
> + cp->multis = "";
> +
> + return classes;
> }

>
> ---------------------------(end of broadcast)---------------------------
> TIP 6: Have you searched our list archives?
>
> http://archives.postgresql.org

--
Bruce Momjian | http://candle.pha.pa.us
pgman(at)candle(dot)pha(dot)pa(dot)us | (610) 853-3000
+ If your life is a hard drive, | 830 Blythe Avenue
+ Christ can be your backup. | Drexel Hill, Pennsylvania 19026

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Doug McNaught 2002-04-17 21:55:14 Re: Index Scans become Seq Scans after VACUUM ANALYSE
Previous Message Bruce Momjian 2002-04-17 21:54:21 Re: Index Scans become Seq Scans after VACUUM ANALYSE