? setof_record.diff Index: doc/src/sgml/xfunc.sgml =================================================================== RCS file: /projects/cvsroot/pgsql/doc/src/sgml/xfunc.sgml,v retrieving revision 1.88 diff -u -r1.88 xfunc.sgml --- doc/src/sgml/xfunc.sgml 20 Sep 2004 22:48:25 -0000 1.88 +++ doc/src/sgml/xfunc.sgml 22 Oct 2004 04:24:53 -0000 @@ -556,6 +556,45 @@ This happens because listchildren returns an empty set for those arguments, so no result rows are generated. + + + One flexible and powerful thing functions can do is return sets of + rows whose type is unknown until runtime. For this purpose, return + SETOF RECORD. The following function returns all the + rows of a given table. + +CREATE FUNCTION from_any_table(TEXT) RETURNS SETOF RECORD AS $$ +DECLARE + r RECORD; +BEGIN + FOR r IN EXECUTE 'SELECT * FROM ' || $1 LOOP + RETURN NEXT r; + END LOOP; + RETURN; +END; +$$ LANGUAGE plpgsql; + + +As PostgreSQL does not know in advance the type of columns coming +back, you must tell it when you invoke the function. + + +SELECT * +FROM + from_any_table('foo') +AS + foo_tab(fooid INTEGER, foosubid INTEGER, fooname TEXT) +ORDER BY fooname +LIMIT 3; + + fooid | foosubid | fooname +-------+----------+--------- + 2 | 3 | Chimpy + 1 | 2 | Ed + 1 | 1 | Joe +(3 rows) + + Index: src/backend/regex/regexec.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/regex/regexec.c,v retrieving revision 1.24 diff -u -r1.24 regexec.c --- src/backend/regex/regexec.c 29 Nov 2003 19:51:55 -0000 1.24 +++ src/backend/regex/regexec.c 22 Oct 2004 04:24:53 -0000 @@ -110,6 +110,7 @@ regmatch_t *pmatch; rm_detail_t *details; chr *start; /* start of string */ + chr *search_start; /* search start of string */ chr *stop; /* just past end of string */ int err; /* error code if any (0 none) */ regoff_t *mem; /* memory vector for backtracking */ @@ -168,6 +169,7 @@ pg_regexec(regex_t *re, const chr *string, size_t len, + size_t search_start, rm_detail_t *details, size_t nmatch, regmatch_t pmatch[], @@ -219,6 +221,7 @@ v->pmatch = pmatch; v->details = details; v->start = (chr *) string; + v->search_start = (chr *) string + search_start; v->stop = (chr *) string + len; v->err = 0; if (backref) @@ -288,7 +291,8 @@ NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); + close = shortest(v, s, v->search_start, v->search_start, v->stop, + &cold, (int *) NULL); freedfa(s); NOERR(); if (v->g->cflags & REG_EXPECT) @@ -415,7 +419,7 @@ assert(d != NULL && s != NULL); cold = NULL; - close = v->start; + close = v->search_start; do { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); Index: src/backend/utils/adt/regexp.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/regexp.c,v retrieving revision 1.54 diff -u -r1.54 regexp.c --- src/backend/utils/adt/regexp.c 29 Aug 2004 04:12:52 -0000 1.54 +++ src/backend/utils/adt/regexp.c 22 Oct 2004 04:24:54 -0000 @@ -81,37 +81,26 @@ /* - * RE_compile_and_execute - compile and execute a RE, caching if possible + * RE_compile_and_cache - compile a RE, caching if possible * - * Returns TRUE on match, FALSE on no match + * Returns regex_t * - * text_re --- the pattern, expressed as an *untoasted* TEXT object - * dat --- the data to match against (need not be null-terminated) - * dat_len --- the length of the data string - * cflags --- compile options for the pattern - * nmatch, pmatch --- optional return area for match details + * text_re --- the pattern, expressed as an *untoasted* TEXT object + * cflags --- compile options for the pattern * - * Both pattern and data are given in the database encoding. We internally - * convert to array of pg_wchar which is what Spencer's regex package wants. + * Pattern is given in the database encoding. We internally convert to + * array of pg_wchar which is what Spencer's regex package wants. */ -static bool -RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, - int cflags, int nmatch, regmatch_t *pmatch) +static regex_t +RE_compile_and_cache(text *text_re, int cflags) { int text_re_len = VARSIZE(text_re); - pg_wchar *data; - size_t data_len; pg_wchar *pattern; size_t pattern_len; int i; int regcomp_result; - int regexec_result; cached_re_str re_temp; - /* Convert data string to wide characters */ - data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(dat, data, dat_len); - /* * Look for a match among previously compiled REs. Since the data * structure is self-organizing with most-used entries at the front, @@ -133,18 +122,7 @@ re_array[0] = re_temp; } - /* Perform RE match and return result */ - regexec_result = pg_regexec(&re_array[0].cre_re, - data, - data_len, - NULL, /* no details */ - nmatch, - pmatch, - 0); - - pfree(data); - - return (regexec_result == 0); + return re_array[0].cre_re; } } @@ -211,10 +189,44 @@ re_array[0] = re_temp; num_res++; + return re_array[0].cre_re; +} + +/* + * RE_compile_and_execute - compile and execute a RE, caching if possible + * + * Returns TRUE on match, FALSE on no match + * + * text_re --- the pattern, expressed as an *untoasted* TEXT object + * dat --- the data to match against (need not be null-terminated) + * dat_len --- the length of the data string + * cflags --- compile options for the pattern + * nmatch, pmatch --- optional return area for match details + * + * Both pattern and data are given in the database encoding. We internally + * convert to array of pg_wchar which is what Spencer's regex package wants. + */ +static bool +RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len, + int cflags, int nmatch, regmatch_t *pmatch) +{ + pg_wchar *data; + size_t data_len; + int regexec_result; + regex_t re; + + /* Convert data string to wide characters */ + data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(dat, data, dat_len); + + /* Compile RE */ + re = RE_compile_and_cache(text_re, cflags); + /* Perform RE match and return result */ regexec_result = pg_regexec(&re_array[0].cre_re, data, data_len, + 0, NULL, /* no details */ nmatch, pmatch, @@ -418,6 +430,26 @@ PG_RETURN_NULL(); } +/* + * textregexreplace() + * Return a replace string matched by a regular expression. + */ +Datum +textregexreplace(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *r = PG_GETARG_TEXT_P(2); + regex_t re; + + re = RE_compile_and_cache(p, regex_flavor); + + return (DirectFunctionCall3(replace_text_regexp, + PointerGetDatum(s), + PointerGetDatum(&re), + PointerGetDatum(r))); +} + /* similar_escape() * Convert a SQL99 regexp pattern to POSIX style, so it can be used by * our regexp engine. Index: src/backend/utils/adt/varlena.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.117 diff -u -r1.117 varlena.c --- src/backend/utils/adt/varlena.c 29 Aug 2004 05:06:50 -0000 1.117 +++ src/backend/utils/adt/varlena.c 22 Oct 2004 04:24:54 -0000 @@ -28,6 +28,7 @@ #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +#include "regex/regex.h" typedef struct varlena unknown; @@ -1992,6 +1993,151 @@ } /* + * have_escape_in_regexp_replace_str + * check replace string have escape char + */ +static bool +have_escape_in_regexp_replace_str(const char *replace_str) +{ + return (strchr(replace_str, '\\') != NULL); +} + +#define REGEXP_REPLACE_BACKREF_CNT 10 +/* + * appendStringInfoRegexpSubstr + * append string for regexp back references. + */ +static void +appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, + regmatch_t *pmatch, text *buf_text, int search_start) +{ + const char *pstart = PG_TEXT_GET_STR(replace_text); + const char *p = pstart; + const char *pnext; + + text *add_text; + int so; + int eo; + + for(;;) { + pnext = strchr(p, '\\'); + if(pnext == NULL) break; + + add_text = text_substring(PointerGetDatum(replace_text), + p - pstart + 1, pnext - p, false); + appendStringInfoString(str, PG_TEXT_GET_STR(add_text)); + pfree(add_text); + + p = pnext + 1; + so = eo = -1; + + if(*p >= '1' && *p <= '9') { + int idx = *p - '0'; + so = pmatch[idx].rm_so - search_start; + eo = pmatch[idx].rm_eo - search_start; + p++; + } else { + switch(*p) { + case '&': + so = pmatch[0].rm_so - search_start; + eo = pmatch[0].rm_eo - search_start; + p++; + break; + } + } + + if(so != -1 && eo != -1) { + add_text = text_substring(PointerGetDatum(buf_text), + so + 1, (eo - so), false); + appendStringInfoString(str, PG_TEXT_GET_STR(add_text)); + pfree(add_text); + } + } + + add_text = text_substring(PointerGetDatum(replace_text), + p - pstart + 1, -1, true); + appendStringInfoString(str, PG_TEXT_GET_STR(add_text)); + pfree(add_text); +} + +/* + * replace_text_regexp + * replace text using regexp +*/ +Datum +replace_text_regexp(PG_FUNCTION_ARGS) +{ + text *left_text; + text *right_text; + text *buf_text; + text *ret_text; + text *src_text = PG_GETARG_TEXT_P(0); + char *src_text_str = PG_TEXT_GET_STR(src_text); + int src_text_len = TEXTLEN(src_text); + regex_t *re = (regex_t *)PG_GETARG_POINTER(1); + text *replace_text = PG_GETARG_TEXT_P(2); + char *replace_str = PG_TEXT_GET_STR(replace_text); + StringInfo str = makeStringInfo(); + int regexec_result; + regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT]; + pg_wchar *data; + size_t data_len; + int search_start; + bool have_escape; + + buf_text = TEXTDUP(src_text); + + /* Convert data string to wide characters */ + data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(src_text_str, data, strlen(src_text_str)); + + have_escape = have_escape_in_regexp_replace_str(replace_str); + + for(search_start = 0; search_start <= data_len;) { + regexec_result = pg_regexec(re, + data, + data_len, + search_start, + NULL, /* no details */ + REGEXP_REPLACE_BACKREF_CNT, + pmatch, + 0); + if(regexec_result != 0) break; + + left_text = text_substring(PointerGetDatum(buf_text), + 1, pmatch[0].rm_so - search_start, false); + right_text = text_substring(PointerGetDatum(buf_text), + pmatch[0].rm_eo - search_start + 1, + -1, true); + + appendStringInfoString(str, PG_TEXT_GET_STR(left_text)); + if(have_escape) { + appendStringInfoRegexpSubstr(str, replace_text, pmatch, + buf_text, search_start); + } else { + appendStringInfoString(str, replace_str); + } + + pfree(buf_text); + pfree(left_text); + buf_text = right_text; + + search_start = pmatch[0].rm_eo; + if(pmatch[0].rm_so == pmatch[0].rm_eo) search_start++; + } + + appendStringInfoString(str, PG_TEXT_GET_STR(buf_text)); + pfree(buf_text); + + ret_text = PG_STR_GET_TEXT(str->data); + pfree(str->data); + pfree(str); + pfree(data); + + PG_RETURN_TEXT_P(ret_text); +} + +/* * split_text * parse input string * return ord item (1 based) Index: src/include/regex/regex.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/regex/regex.h,v retrieving revision 1.26 diff -u -r1.26 regex.h --- src/include/regex/regex.h 29 Nov 2003 22:41:10 -0000 1.26 +++ src/include/regex/regex.h 22 Oct 2004 04:24:54 -0000 @@ -163,7 +163,7 @@ * the prototypes for exported functions */ extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int); -extern int pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int); +extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int); extern void pg_regfree(regex_t *); extern size_t pg_regerror(int, const regex_t *, char *, size_t); Index: src/include/utils/builtins.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/utils/builtins.h,v retrieving revision 1.251 diff -u -r1.251 builtins.h --- src/include/utils/builtins.h 4 Oct 2004 22:49:59 -0000 1.251 +++ src/include/utils/builtins.h 22 Oct 2004 04:24:54 -0000 @@ -425,6 +425,7 @@ extern Datum texticregexeq(PG_FUNCTION_ARGS); extern Datum texticregexne(PG_FUNCTION_ARGS); extern Datum textregexsubstr(PG_FUNCTION_ARGS); +extern Datum textregexreplace(PG_FUNCTION_ARGS); extern Datum similar_escape(PG_FUNCTION_ARGS); /* regproc.c */ @@ -560,6 +561,7 @@ extern bool SplitIdentifierString(char *rawstring, char separator, List **namelist); extern Datum replace_text(PG_FUNCTION_ARGS); +extern Datum replace_text_regexp(PG_FUNCTION_ARGS); extern Datum split_text(PG_FUNCTION_ARGS); extern Datum text_to_array(PG_FUNCTION_ARGS); extern Datum array_to_text(PG_FUNCTION_ARGS);