Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.361
diff -c -r1.361 func.sgml
*** doc/src/sgml/func.sgml	16 Feb 2007 07:46:54 -0000	1.361
--- doc/src/sgml/func.sgml	18 Feb 2007 05:54:14 -0000
***************
*** 1446,1451 ****
--- 1446,1464 ----
        </row>
  
        <row>
+        <entry><literal><function>regexp_matches</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry>
+        <entry><type>setof text[]</type></entry>
+        <entry>
+         Return all capture groups resulting from matching POSIX regular
+         expression against the <parameter>string</parameter>. See
+         <xref linkend="functions-matching"> for more information on pattern
+         matching.
+        </entry>
+        <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
+        <entry><literal>{bar,beque}</literal></entry>
+       </row>
+ 
+       <row>
         <entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry>
         <entry><type>text</type></entry>
         <entry>
***************
*** 1458,1463 ****
--- 1471,1500 ----
        </row>
  
        <row>
+        <entry><literal><function>regexp_split</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry>
+        <entry><type>setof text</type></entry>
+        <entry>
+         Splits <parameter>string</parameter> using POSIX regular expression as
+         the delimiter.  See <xref linkend="functions-matching"> for more
+         information on pattern matching.
+        </entry>
+        <entry><literal>regexp_split('hello world', E'\\s+')</literal></entry>
+        <entry><literal>hello</literal><para><literal>world</literal></para> (2 rows)</entry>
+       </row>
+ 
+       <row>
+        <entry><literal><function>regexp_split_array</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type> [,<parameter>limit</parameter> <type>int</type>]])</literal></entry>
+        <entry><type>text[]</type></entry>
+        <entry>
+         Splits <parameter>string</parameter> using POSIX regular expression as
+         the delimiter.  See <xref linkend="functions-matching"> for more
+         information on pattern matching.
+        </entry>
+        <entry><literal>regexp_split_array('hello world', E'\\s+')</literal></entry>
+        <entry><literal>{hello,world}</literal></entry>
+       </row>
+ 
+       <row>
         <entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>int</type>)</literal></entry>
         <entry><type>text</type></entry>
         <entry>Repeat <parameter>string</parameter> the specified
***************
*** 2861,2869 ****
     <indexterm>
      <primary>substring</primary>
     </indexterm>
-    <indexterm>
-     <primary>regexp_replace</primary>
-    </indexterm>
  
  <synopsis>
  <replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
--- 2898,2903 ----
***************
*** 2982,2987 ****
--- 3016,3036 ----
      <primary>regular expression</primary>
      <seealso>pattern matching</seealso>
     </indexterm>
+    <indexterm>
+     <primary>substring</primary>
+    </indexterm>
+    <indexterm>
+     <primary>regexp_replace</primary>
+    </indexterm>
+    <indexterm>
+     <primary>regexp_matches</primary>
+    </indexterm>
+    <indexterm>
+     <primary>regexp_split</primary>
+    </indexterm>
+    <indexterm>
+     <primary>regexp_split_array</primary>
+    </indexterm>
  
     <para>
      <xref linkend="functions-posix-table"> lists the available
***************
*** 3112,3118 ****
       string containing zero or more single-letter flags that change the
       function's behavior.  Flag <literal>i</> specifies case-insensitive
       matching, while flag <literal>g</> specifies replacement of each matching
!      substring rather than only the first one.
      </para>
  
     <para>
--- 3161,3170 ----
       string containing zero or more single-letter flags that change the
       function's behavior.  Flag <literal>i</> specifies case-insensitive
       matching, while flag <literal>g</> specifies replacement of each matching
!      substring rather than only the first one.  Other supported flags are
!      <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
!      <literal>x</>, whose meanings correspond to those shown in
!      <xref linkend="posix-embedded-options-table">.
      </para>
  
     <para>
***************
*** 3127,3132 ****
--- 3179,3332 ----
  </programlisting>
     </para>
  
+     <para>
+      The <function>regexp_matches</> function returns all of the capture
+      groups resulting from matching POSIX regular expression patterns.
+      It has the syntax
+      <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>,
+      <optional>, <replaceable>flags</> </optional>).
+      If there is no match to the <replaceable>pattern</>, the function returns no rows.
+      If there is a match, the function returns the contents of all of the capture groups
+      in a text array, or if there were no capture groups in the pattern, it returns the
+      contents of the entire match as a single-element text array.
+      The <replaceable>flags</> parameter is an optional text
+      string containing zero or more single-letter flags that change the
+      function's behavior.  Flag <literal>i</> specifies case-insensitive
+      matching, while flag <literal>g</> causes the return of each matching
+      substring rather than only the first one.  Other supported
+      flags are <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
+      <literal>x</>, whose meanings correspond to those shown in
+      <xref linkend="posix-embedded-options-table">.
+     </para>
+ 
+    <para>
+     Some examples:
+ <programlisting>
+ SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
+  regexp_matches 
+ ----------------
+  {bar,beque}
+ (1 row)
+ 
+ SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
+  regexp_matches 
+ ----------------
+  {bar,beque}
+  {bazil,barf}
+ (2 rows)
+ 
+ SELECT regexp_matches('foobarbequebaz', 'barbeque');
+  regexp_matches 
+ ----------------
+  {barbeque}
+ (1 row)
+ 
+ </programlisting>
+    </para>
+ 
+     <para>
+      The <function>regexp_split</> function splits a string using a POSIX
+      regular expression pattern as a delimiter.  It has the syntax
+      <function>regexp_split</function>(<replaceable>string</>, <replaceable>pattern</>,
+      <optional>, <replaceable>flags</> </optional>).
+      If there is no match to the <replaceable>pattern</>, the function returns the
+      <literal>string</>.  If there is at least one match, for each match it returns
+      the text from the end of the last match (or the beginning of the string)
+      to the beginning of the match.  When there are no more matches, it
+      returns the text from the end of the last match to the end of the string.
+      The <replaceable>flags</> parameter is an optional text string containing
+      zero or more single-letter flags that change the function's behavior.
+      <function>regexp_split</function> supports the flags <literal>i</>,
+      <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
+      <literal>x</>, whose meanings are described in
+      <xref linkend="posix-embedded-options-table">.
+     </para>
+ 
+     <para>
+      The <function>regexp_split_array</> function operates the same as
+      <function>regexp_split</>, except that <function>regexp_split_array</>
+      returns its results in a text[].  It has the syntax
+      <function>regexp_split_array</function>(<replaceable>string</>, <replaceable>pattern</>,
+      <optional>, <replaceable>flags</> <optional>, <replaceable>limit</> </optional> </optional>).
+      The parameters are the same as for <function>regexp_split</>, except for the <replaceable>limit</>
+      parameter, which, if specified, gives the maximum number of items to return in the <type>text[]</>.
+     </para>
+ 
+ 
+    <para>
+     Some examples:
+ <programlisting>
+ 
+ SELECT foo FROM regexp_split('the quick brown fox jumped over the lazy dog', E'\\\s+') AS foo;
+   foo   
+ --------
+  the    
+  quick  
+  brown  
+  fox    
+  jumped 
+  over   
+  the    
+  lazy   
+  dog    
+ (9 rows)
+ 
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', E'\\s+');
+                regexp_split_array               
+ ------------------------------------------------
+  {the,quick,brown,fox,jumped,over,the,lazy,dog}
+ (1 row)
+ 
+ SELECT foo FROM regexp_split('the quick brown fox jumped over the lazy dog', E'\\s*') AS foo;
+  resultstr  
+ ------------
+  t         
+  h         
+  e         
+  q         
+  u         
+  i         
+  c         
+  k         
+  b         
+  r         
+  o         
+  w         
+  n         
+  f         
+  o         
+  x         
+  j         
+  u         
+  m         
+  p         
+  e         
+  d         
+  o         
+  v         
+  e         
+  r         
+  t         
+  h         
+  e         
+  l         
+  a         
+  z         
+  y         
+  d         
+  o         
+  g         
+ (36 rows)
+ 
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', '\\s*', '', 8);
+  regexp_split_array 
+ --------------------
+  {t,h,e,q,u,i,c,k}
+ (1 row)
+ 
+ </programlisting>
+    </para>
+ 
     <para>
      <productname>PostgreSQL</productname>'s regular expressions are implemented
      using a package written by Henry Spencer.  Much of
Index: src/backend/utils/adt/regexp.c
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/backend/utils/adt/regexp.c,v
retrieving revision 1.68
diff -c -r1.68 regexp.c
*** src/backend/utils/adt/regexp.c	5 Jan 2007 22:19:41 -0000	1.68
--- src/backend/utils/adt/regexp.c	18 Feb 2007 06:02:29 -0000
***************
*** 29,36 ****
--- 29,39 ----
   */
  #include "postgres.h"
  
+ #include "funcapi.h"
+ #include "access/heapam.h"
  #include "regex/regex.h"
  #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
  #include "utils/guc.h"
  
  
***************
*** 75,83 ****
--- 78,133 ----
  	regex_t		cre_re;			/* the compiled regular expression */
  } cached_re_str;
  
+ typedef struct re_comp_flags
+ {
+ 	int			  cflags;
+ 	bool		  glob;
+ } re_comp_flags;
+ 
+ typedef struct regexp_matches_ctx
+ {
+ 	text		 *orig_str;
+ 	size_t		  orig_len;
+ 	pg_wchar	 *wide_str;
+ 	size_t		  wide_len;
+ 	regex_t		 *cpattern;
+ 	regmatch_t	 *pmatch;
+ 	size_t		  offset;
+ 
+ 	re_comp_flags flags;
+ 
+ 	/* text type info */
+ 	Oid			  param_type;
+ 	int16		  typlen;
+ 	bool		  typbyval;
+ 	char		  typalign;
+ } regexp_matches_ctx;
+ 
+ typedef struct regexp_split_ctx
+ {
+ 	text		 *orig_str;
+ 	size_t		  orig_len;
+ 	pg_wchar	 *wide_str;
+ 	size_t		  wide_len;
+ 	regex_t		 *cpattern;
+ 	regmatch_t	  match;
+ 	size_t		  offset;
+ 	re_comp_flags flags;
+ } regexp_split_ctx;
+ 
+ 
  static int	num_res = 0;		/* # of cached re's */
  static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */
  
+ static regexp_matches_ctx *setup_regexp_matches(FunctionCallInfo fcinfo,
+ 												text *orig_str, text *pattern,
+ 												text *flags);
+ static ArrayType *perform_regexp_matches(regexp_matches_ctx *matchctx);
+ 
+ static regexp_split_ctx *setup_regexp_split(text *str, text *pattern,
+ 											text *flags);
+ static Datum get_next_split(regexp_split_ctx *splitctx);
+ 
  
  /*
   * RE_compile_and_cache - compile a RE, caching if possible
***************
*** 88,94 ****
   *	cflags --- compile options for the pattern
   *
   * Pattern is given in the database encoding.  We internally convert to
!  * array of pg_wchar which is what Spencer's regex package wants.
   */
  static regex_t *
  RE_compile_and_cache(text *text_re, int cflags)
--- 138,144 ----
   *	cflags --- compile options for the pattern
   *
   * Pattern is given in the database encoding.  We internally convert to
!  * an array of pg_wchar, which is what Spencer's regex package wants.
   */
  static regex_t *
  RE_compile_and_cache(text *text_re, int cflags)
***************
*** 191,238 ****
  }
  
  /*
!  * RE_compile_and_execute - compile and execute a RE
   *
   * Returns TRUE on match, FALSE on no match
   *
!  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
!  *	dat --- the data to match against (need not be null-terminated)
!  *	dat_len --- the length of the data string
!  *	cflags --- compile options for the pattern
   *	nmatch, pmatch	--- optional return area for match details
   *
!  * Both pattern and data are given in the database encoding.  We internally
!  * convert to array of pg_wchar which is what Spencer's regex package wants.
   */
  static bool
! RE_compile_and_execute(text *text_re, char *dat, int dat_len,
! 					   int cflags, int nmatch, regmatch_t *pmatch)
  {
- 	pg_wchar   *data;
- 	size_t		data_len;
  	int			regexec_result;
- 	regex_t    *re;
  	char		errMsg[100];
  
- 	/* Convert data string to wide characters */
- 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
- 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
- 
- 	/* Compile RE */
- 	re = RE_compile_and_cache(text_re, cflags);
- 
  	/* Perform RE match and return result */
  	regexec_result = pg_regexec(re,
  								data,
  								data_len,
! 								0,
  								NULL,	/* no details */
  								nmatch,
  								pmatch,
  								0);
  
- 	pfree(data);
- 
  	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
  	{
  		/* re failed??? */
--- 241,276 ----
  }
  
  /*
!  * RE_wchar_execute - execute a RE
   *
   * Returns TRUE on match, FALSE on no match
   *
!  *	re --- the compiled pattern as returned by RE_compile_and_cache
!  *	data --- the data to match against (need not be null-terminated)
!  *	data_len --- the length of the data string
!  *	start_search -- the offset in the data to start searching
   *	nmatch, pmatch	--- optional return area for match details
   *
!  * Data is given as array of pg_wchar which is what Spencer's regex package
!  * wants.
   */
  static bool
! RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, size_t start_search,
! 				 int nmatch, regmatch_t *pmatch)
  {
  	int			regexec_result;
  	char		errMsg[100];
  
  	/* Perform RE match and return result */
  	regexec_result = pg_regexec(re,
  								data,
  								data_len,
! 								start_search,
  								NULL,	/* no details */
  								nmatch,
  								pmatch,
  								0);
  
  	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
  	{
  		/* re failed??? */
***************
*** 245,250 ****
--- 283,392 ----
  	return (regexec_result == REG_OKAY);
  }
  
+ /*
+  * RE_execute - execute a RE
+  *
+  * Returns TRUE on match, FALSE on no match
+  *
+  *	re --- the compiled pattern as returned by RE_compile_and_cache
+  *	dat --- the data to match against (need not be null-terminated)
+  *	dat_len --- the length of the data string
+  *	nmatch, pmatch	--- optional return area for match details
+  *
+  * Data is given in the database encoding.  We internally
+  * convert to array of pg_wchar which is what Spencer's regex package wants.
+  */
+ static bool
+ RE_execute(regex_t *re, char *dat, int dat_len,
+ 		   int nmatch, regmatch_t *pmatch)
+ {
+ 	pg_wchar   *data;
+ 	size_t		data_len;
+ 	bool		match;
+ 
+ 	/* Convert data string to wide characters */
+ 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ 	data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+ 
+ 	/* Perform RE match and return result */
+ 	match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
+ 	pfree(data);
+ 	return match;
+ }
+ 
+ /*
+  * RE_compile_and_execute - compile and execute a RE
+  *
+  * Returns TRUE on match, FALSE on no match
+  *
+  *	text_re --- the pattern, expressed as an *untoasted* TEXT object
+  *	dat --- the data to match against (need not be null-terminated)
+  *	dat_len --- the length of the data string
+  *	cflags --- compile options for the pattern
+  *	nmatch, pmatch	--- optional return area for match details
+  *
+  * Both pattern and data are given in the database encoding.  We internally
+  * convert to array of pg_wchar which is what Spencer's regex package wants.
+  */
+ static bool
+ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ 					   int cflags, int nmatch, regmatch_t *pmatch)
+ {
+ 	regex_t    *re;
+ 
+ 	/* Compile RE */
+ 	re = RE_compile_and_cache(text_re, cflags);
+ 
+ 	return RE_execute(re, dat, dat_len, nmatch, pmatch);
+ }
+ 
+ static void
+ parse_re_comp_flags(re_comp_flags *flags, text *opts)
+ {
+ 	MemSet(flags, 0, sizeof(re_comp_flags));
+ 	flags->cflags = regex_flavor;
+ 
+ 	if (opts)
+ 	{
+ 		char  *opt_p = VARDATA(opts);
+ 		size_t opt_len = VARSIZE(opts) - VARHDRSZ;
+ 		int i;
+ 
+ 		for (i = 0; i < opt_len; i++)
+ 		{
+ 			switch (opt_p[i])
+ 			{
+ 				case 'g':
+ 					flags->glob = true;
+ 					break;
+ 				case 'i':
+ 					flags->cflags |= REG_ICASE;
+ 					break;
+ 				case 'm':
+ 				case 'n':
+ 					flags->cflags |= REG_NEWLINE;
+ 					break;
+ 				case 'p':
+ 					flags->cflags |= REG_NLSTOP;
+ 					flags->cflags &= ~REG_NLANCH;
+ 					break;
+ 				case 'w':
+ 					flags->cflags &= ~REG_NLSTOP;
+ 					flags->cflags |= REG_NLANCH;
+ 					break;
+ 				case 'x':
+ 					flags->cflags |= REG_EXPANDED;
+ 					break;
+ 				default:
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 							 errmsg("invalid regexp option: %c", opt_p[i])));
+ 					break;
+ 			}
+ 		}
+ 	}
+ }
+ 
  
  /*
   * assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
***************
*** 469,507 ****
  	text	   *p = PG_GETARG_TEXT_P(1);
  	text	   *r = PG_GETARG_TEXT_P(2);
  	text	   *opt = PG_GETARG_TEXT_P(3);
- 	char	   *opt_p = VARDATA(opt);
- 	int			opt_len = (VARSIZE(opt) - VARHDRSZ);
- 	int			i;
- 	bool		glob = false;
- 	bool		ignorecase = false;
  	regex_t    *re;
  
! 	/* parse options */
! 	for (i = 0; i < opt_len; i++)
! 	{
! 		switch (opt_p[i])
! 		{
! 			case 'i':
! 				ignorecase = true;
! 				break;
! 			case 'g':
! 				glob = true;
! 				break;
! 			default:
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! 						 errmsg("invalid option of regexp_replace: %c",
! 								opt_p[i])));
! 				break;
! 		}
! 	}
  
! 	if (ignorecase)
! 		re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
! 	else
! 		re = RE_compile_and_cache(p, regex_flavor);
  
! 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob));
  }
  
  /* similar_escape()
--- 611,624 ----
  	text	   *p = PG_GETARG_TEXT_P(1);
  	text	   *r = PG_GETARG_TEXT_P(2);
  	text	   *opt = PG_GETARG_TEXT_P(3);
  	regex_t    *re;
+ 	re_comp_flags flags;
  
! 	parse_re_comp_flags(&flags, opt);
  
! 	re = RE_compile_and_cache(p, flags.cflags);
  
! 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
  }
  
  /* similar_escape()
***************
*** 625,630 ****
--- 742,1102 ----
  	PG_RETURN_TEXT_P(result);
  }
  
+ #define PG_GETARG_TEXT_P_IF_EXISTS(_n) \
+ 	(PG_NARGS() > _n ? PG_GETARG_TEXT_P(_n) : NULL)
+ 
+ Datum
+ regexp_matches(PG_FUNCTION_ARGS)
+ {
+ 	FuncCallContext		*funcctx;
+ 	MemoryContext		 oldcontext;
+ 	regexp_matches_ctx	*matchctx;
+ 
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		text *pattern = PG_GETARG_TEXT_P(1);
+ 		text *flags   = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ 
+ 		funcctx = SRF_FIRSTCALL_INIT();
+ 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ 
+ 		/* be sure to copy the input string into the multi-call ctx */
+ 		matchctx = setup_regexp_matches(fcinfo, PG_GETARG_TEXT_P_COPY(0),
+ 										pattern, flags);
+ 
+ 		MemoryContextSwitchTo(oldcontext);
+ 		funcctx->user_fctx = (void *) matchctx;
+ 
+ 		/* Avoid run-away function by making sure we never iterate more than
+ 		 * the length of the text + 1 (the number of matches an empty pattern
+ 		 * will make is length + 1)
+ 		 */
+ 		if (matchctx->flags.glob)
+ 			funcctx->max_calls = matchctx->wide_len + 1;
+ 		else
+ 			funcctx->max_calls = 0;
+ 	}
+ 
+ 	funcctx = SRF_PERCALL_SETUP();
+ 	matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
+ 
+ 	if (funcctx->call_cntr > funcctx->max_calls)
+ 	{
+ 		/* if max_calls == 0, then we are doing a non-global match, we should
+ 		 * stop now, no proplem.  Otherwise, if we exceed max_calls something
+ 		 * really wonky is going on, since it is returning more matches than
+ 		 * there are characters in the string, which should not happen
+ 		 */
+ 		if (funcctx->max_calls != 0)
+ 			elog(ERROR, "set returning match function terminated after iterating %d times", funcctx->call_cntr);
+ 		SRF_RETURN_DONE(funcctx);
+ 	}
+ 
+ 	if (matchctx->offset < matchctx->wide_len)
+ 	{
+ 		ArrayType *result_ary;
+ 
+ 		if (matchctx->pmatch[0].rm_so == matchctx->pmatch[0].rm_eo)
+ 			matchctx->offset++;
+ 
+ 		if ((result_ary = perform_regexp_matches(matchctx)) != NULL)
+ 		{
+ 			matchctx->offset = matchctx->pmatch[0].rm_eo;
+ 			SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+ 		}
+ 		/* else fall through and return done */
+ 	}
+ 
+ 	SRF_RETURN_DONE (funcctx);
+ }
+ 
+ Datum
+ regexp_matches_noopts(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_matches(fcinfo);
+ }
+ 
+ static regexp_matches_ctx *
+ setup_regexp_matches(FunctionCallInfo fcinfo, text *orig_str, text *pattern, text *flags)
+ {
+ 	regexp_matches_ctx	*matchctx = palloc(sizeof(regexp_matches_ctx));
+ 
+ 	matchctx->orig_str = orig_str;
+ 	matchctx->orig_len = VARSIZE(matchctx->orig_str) - VARHDRSZ;
+ 
+ 	parse_re_comp_flags(&matchctx->flags, flags);
+ 
+ 	matchctx->cpattern = RE_compile_and_cache(pattern, matchctx->flags.cflags);
+ 	matchctx->pmatch = palloc(sizeof(regmatch_t) * (matchctx->cpattern->re_nsub + 1));
+ 	matchctx->offset = 0;
+ 
+ 	/* get text type oid, too lazy to do it some other way */
+ 	matchctx->param_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ 	get_typlenbyvalalign(matchctx->param_type, &matchctx->typlen,
+ 						 &matchctx->typbyval, &matchctx->typalign);
+ 
+ 	matchctx->wide_str = (pg_wchar *) palloc((matchctx->orig_len + 1) * sizeof(pg_wchar));
+ 	matchctx->wide_len = pg_mb2wchar_with_len(VARDATA(matchctx->orig_str),
+ 											  matchctx->wide_str, matchctx->orig_len);
+ 
+ 	matchctx->pmatch[0].rm_so = -1;
+ 	/* both < 0 but not equal */
+ 	matchctx->pmatch[0].rm_eo = -2;
+ 
+ 	return matchctx;
+ }
+ 
+ static ArrayType *
+ perform_regexp_matches(regexp_matches_ctx *matchctx)
+ {
+ 	if (RE_wchar_execute(matchctx->cpattern,
+ 						 matchctx->wide_str,
+ 						 matchctx->wide_len,
+ 						 matchctx->offset,
+ 						 matchctx->cpattern->re_nsub + 1,
+ 						 matchctx->pmatch))
+ 	{
+ 		Datum *elems;
+ 		bool *nulls;
+ 		/* following only used when there are no capture groups,
+ 		 * to avoid pallocing one datum
+ 		 */
+ 		Datum fullmatch;
+ 		int ndims = 1;
+ 		int dims[1];
+ 		int lbs[1] = {1};
+ 
+ 		if (matchctx->cpattern->re_nsub > 0)
+ 		{
+ 			int i;
+ 
+ 			elems = palloc(matchctx->cpattern->re_nsub * sizeof(Datum));
+ 			nulls = palloc(matchctx->cpattern->re_nsub * sizeof(bool));
+ 			dims[0] = matchctx->cpattern->re_nsub;
+ 
+ 			for (i = 0; i < matchctx->cpattern->re_nsub; i++)
+ 			{
+ 				int so = matchctx->pmatch[i+1].rm_so;
+ 				int	eo = matchctx->pmatch[i+1].rm_eo;
+ 
+ 				if (so < 0 || eo < 0)
+ 				{
+ 					elems[i] = 0;
+ 					nulls[i] = true;
+ 				}
+ 				else
+ 				{
+ 					elems[i] = DirectFunctionCall3(text_substr,
+ 							PointerGetDatum(matchctx->orig_str),
+ 							Int32GetDatum(so + 1),
+ 							Int32GetDatum(eo - so));
+ 					nulls[i] = false;
+ 				}
+ 			}
+ 		}
+ 		else
+ 		{
+ 			int so = matchctx->pmatch[0].rm_so;
+ 			int	eo = matchctx->pmatch[0].rm_eo;
+ 
+ 			if (so < 0 || eo < 0)
+ 				elog(ERROR, "regexp code said it had a match, but did not return it");
+ 
+ 			fullmatch = DirectFunctionCall3(text_substr,
+ 					PointerGetDatum(matchctx->orig_str),
+ 					Int32GetDatum(so + 1),
+ 					Int32GetDatum(eo - so));
+ 
+ 			elems = &fullmatch;
+ 			nulls = NULL;
+ 			dims[0] = 1;
+ 		}
+ 
+ 		return construct_md_array(elems, nulls, ndims, dims, lbs,
+ 								  matchctx->param_type, matchctx->typlen,
+ 								  matchctx->typbyval, matchctx->typalign);
+ 	}
+ 	else
+ 		return NULL;
+ }
+ 
+ Datum
+ regexp_split(PG_FUNCTION_ARGS)
+ {
+ 	FuncCallContext  *funcctx;
+ 	regexp_split_ctx *splitctx;
+ 
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		text 			*pattern  = PG_GETARG_TEXT_P(1);
+ 		text 			*flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ 		MemoryContext    oldcontext;
+ 
+ 		funcctx = SRF_FIRSTCALL_INIT();
+ 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ 
+ 		splitctx = setup_regexp_split(PG_GETARG_TEXT_P_COPY(0), pattern, flags);
+ 
+ 		MemoryContextSwitchTo(oldcontext);
+ 		funcctx->user_fctx = (void *) splitctx;
+ 
+ 		/* Avoid run-away function by making sure we never iterate more than
+ 		 * the length of the text
+ 		 */
+ 		funcctx->max_calls = splitctx->wide_len;
+ 	}
+ 
+ 	funcctx = SRF_PERCALL_SETUP();
+ 	splitctx = (regexp_split_ctx *) funcctx->user_fctx;
+ 
+ 	/* if we exceed max_calls something really wonky is going on, since it is
+ 	 * returning more matches than there are characters in the string, which
+ 	 * should not happen
+ 	 */
+ 	if (funcctx->call_cntr > funcctx->max_calls)
+ 		elog(ERROR, "set returning split function terminated after iterating %d times", funcctx->call_cntr);
+ 
+ 	if (splitctx->offset < splitctx->wide_len)
+ 		SRF_RETURN_NEXT(funcctx, get_next_split(splitctx));
+ 	else
+ 		SRF_RETURN_DONE(funcctx);
+ }
+ 
+ static regexp_split_ctx *
+ setup_regexp_split(text *str, text *pattern, text *flags)
+ {
+ 	regexp_split_ctx *splitctx = palloc(sizeof(regexp_split_ctx));
+ 
+ 	splitctx->orig_str = str;
+ 	splitctx->orig_len = VARSIZE(splitctx->orig_str) - VARHDRSZ;
+ 
+ 	parse_re_comp_flags(&splitctx->flags, flags);
+ 	if (splitctx->flags.glob)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("regexp_split does not support the global option")));
+ 
+ 	splitctx->cpattern = RE_compile_and_cache(pattern, splitctx->flags.cflags);
+ 
+ 	splitctx->wide_str = (pg_wchar *) palloc((splitctx->orig_len + 1) * sizeof(pg_wchar));
+ 	splitctx->wide_len = pg_mb2wchar_with_len(VARDATA(splitctx->orig_str),
+ 			splitctx->wide_str, splitctx->orig_len);
+ 
+ 	splitctx->offset = 0;
+ 
+ 	splitctx->match.rm_so = -1;
+ 	/* both < 0 but not equal */
+ 	splitctx->match.rm_eo = -2;
+ 
+ 	return splitctx;
+ }
+ 
+ static Datum
+ get_next_split(regexp_split_ctx *splitctx)
+ {
+ 	regmatch_t *pmatch = &(splitctx->match);
+ 
+ 	for (;;)
+ 	{
+ 		Datum result;
+ 		int	  startpos = splitctx->offset + 1;
+ 
+ 		/* if the last match was zero-length, we need to push the offset
+ 		 * forward to avoid matching the same place forever
+ 		 */
+ 		if (pmatch->rm_so == pmatch->rm_eo)
+ 			splitctx->offset++;
+ 
+ 		if (RE_wchar_execute(splitctx->cpattern,
+ 					splitctx->wide_str,
+ 					splitctx->wide_len,
+ 					splitctx->offset,
+ 					1,
+ 					pmatch))
+ 		{
+ 			int length = splitctx->match.rm_so - startpos + 1;
+ 
+ 			/* if we are trying to match at the beginning of the string and
+ 			 * we got a zero-length match, or if we just matched where we
+ 			 * left off last time, go around the loop again and increment
+ 			 * the offset.  If we have incremented the offset already and
+ 			 * it matched at the new offset, that's ok
+ 			 */
+ 			if (length == 0)
+ 				continue;
+ 
+ 			result = DirectFunctionCall3(text_substr,
+ 					PointerGetDatum(splitctx->orig_str),
+ 					Int32GetDatum(startpos),
+ 					Int32GetDatum(length));
+ 
+ 			/* set the offset to the end of this match for next time */
+ 			splitctx->offset = pmatch->rm_eo;
+ 
+ 			return result;
+ 		}
+ 		else
+ 		{
+ 			/* no more matches, return rest of string */
+ 			result = DirectFunctionCall2(text_substr_no_len,
+ 					PointerGetDatum(splitctx->orig_str),
+ 					Int32GetDatum(startpos));
+ 
+ 			/* so we know we're done next time through */
+ 			splitctx->offset = splitctx->wide_len;
+ 
+ 			return result;
+ 		}
+ 
+ 		/* will never get here, continue above is only way this loop
+ 		 * will loop
+ 		 */
+ 	}
+ }
+ 
+ Datum regexp_split_noopts(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_split(fcinfo);
+ }
+ 
+ Datum regexp_split_array_limit(PG_FUNCTION_ARGS)
+ {
+ 	ArrayBuildState *astate = NULL;
+ 	regexp_split_ctx *splitctx = setup_regexp_split(PG_GETARG_TEXT_P(0),
+ 			PG_GETARG_TEXT_P(1), PG_GETARG_TEXT_P_IF_EXISTS(2));
+ 	/* get text type oid, too lazy to do it some other way */
+ 	Oid param_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ 	int nitems = 0;
+ 	int limit = -1;
+ 
+ 	if (PG_NARGS() > 3)
+ 		limit = PG_GETARG_INT32(3);
+ 
+ 	while (splitctx->offset < splitctx->wide_len &&
+ 			(limit < 0 || nitems++ < limit))
+ 	{
+ 		if (nitems > splitctx->wide_len)
+ 			elog(ERROR, "split function terminated after iterating %d times", nitems);
+ 
+ 		astate = accumArrayResult(astate,
+ 								  get_next_split(splitctx),
+ 								  false,
+ 								  param_type,
+ 								  CurrentMemoryContext);
+ 	}
+ 
+ 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+ }
+ 
+ Datum regexp_split_array(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_split_array_limit(fcinfo);
+ }
+ Datum regexp_split_array_noopts(PG_FUNCTION_ARGS)
+ {
+ 	return regexp_split_array_limit(fcinfo);
+ }
+ 
  /*
   * report whether regex_flavor is currently BASIC
   */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.445
diff -c -r1.445 pg_proc.h
*** src/include/catalog/pg_proc.h	17 Feb 2007 00:55:57 -0000	1.445
--- src/include/catalog/pg_proc.h	18 Feb 2007 05:59:38 -0000
***************
*** 2259,2266 ****
--- 2259,2280 ----
  DESCR("replace text using regexp");
  DATA(insert OID =  2285 ( regexp_replace	   PGNSP PGUID 12 1 0 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_ textregexreplace - _null_ ));
  DESCR("replace text using regexp");
+ DATA(insert OID =  2760 ( regexp_matches   PGNSP PGUID 12 1 1 f f t t i 2 1009 "25 25" _null_ _null_ _null_	regexp_matches_noopts - _null_ ));
+ DESCR("return all match groups for regexp");
+ DATA(insert OID =  2761 ( regexp_matches   PGNSP PGUID 12 1 10 f f t t i 3 1009 "25 25 25" _null_ _null_ _null_	regexp_matches - _null_ ));
+ DESCR("return all match groups for regexp");
  DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 f f t f i 3 25 "25 25 23" _null_ _null_ _null_	split_text - _null_ ));
  DESCR("split string by field_sep and return field_num");
+ DATA(insert OID =  2762 ( regexp_split PGNSP PGUID 12 1 1000 f f t t i 2 25 "25 25" _null_ _null_ _null_	regexp_split_noopts - _null_ ));
+ DESCR("split string by pattern");
+ DATA(insert OID =  2763 ( regexp_split PGNSP PGUID 12 1 1000 f f t t i 3 25 "25 25 25" _null_ _null_ _null_	regexp_split - _null_ ));
+ DESCR("split string by pattern");
+ DATA(insert OID =  2764 ( regexp_split_array PGNSP PGUID 12 1 0 f f t f i 2 1009 "25 25" _null_ _null_ _null_	regexp_split_array_noopts - _null_ ));
+ DESCR("split string by pattern");
+ DATA(insert OID =  2765 ( regexp_split_array PGNSP PGUID 12 1 0 f f t f i 3 1009 "25 25 25" _null_ _null_ _null_	regexp_split_array - _null_ ));
+ DESCR("split string by pattern");
+ DATA(insert OID =  2766 ( regexp_split_array PGNSP PGUID 12 1 0 f f t f i 4 1009 "25 25 25 23" _null_ _null_ _null_	regexp_split_array_limit - _null_ ));
+ DESCR("split string by pattern");
  DATA(insert OID =  2089 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "23" _null_ _null_ _null_	to_hex32 - _null_ ));
  DESCR("convert int4 number to hex");
  DATA(insert OID =  2090 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "20" _null_ _null_ _null_	to_hex64 - _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.288
diff -c -r1.288 builtins.h
*** src/include/utils/builtins.h	17 Feb 2007 00:55:58 -0000	1.288
--- src/include/utils/builtins.h	18 Feb 2007 00:19:29 -0000
***************
*** 478,483 ****
--- 478,490 ----
  extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
  extern Datum textregexreplace(PG_FUNCTION_ARGS);
  extern Datum similar_escape(PG_FUNCTION_ARGS);
+ extern Datum regexp_matches(PG_FUNCTION_ARGS);
+ extern Datum regexp_matches_noopts(PG_FUNCTION_ARGS);
+ extern Datum regexp_split(PG_FUNCTION_ARGS);
+ extern Datum regexp_split_noopts(PG_FUNCTION_ARGS);
+ extern Datum regexp_split_array(PG_FUNCTION_ARGS);
+ extern Datum regexp_split_array_noopts(PG_FUNCTION_ARGS);
+ extern Datum regexp_split_array_limit(PG_FUNCTION_ARGS);
  extern bool regex_flavor_is_basic(void);
  
  /* regproc.c */
Index: src/test/regress/expected/strings.out
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/test/regress/expected/strings.out,v
retrieving revision 1.29
diff -c -r1.29 strings.out
*** src/test/regress/expected/strings.out	11 May 2006 19:15:36 -0000	1.29
--- src/test/regress/expected/strings.out	18 Feb 2007 00:23:03 -0000
***************
*** 217,225 ****
   Z Z
  (1 row)
  
! -- invalid option of REGEXP_REPLACE
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
! ERROR:  invalid option of regexp_replace: z
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
   4 
--- 217,455 ----
   Z Z
  (1 row)
  
! -- invalid regexp option
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
! ERROR:  invalid regexp option: z
! -- set so we can tell NULL from empty string
! \pset null '\\N'
! -- return all matches from regexp
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
!  regexp_matches 
! ----------------
!  {bar,beque}
! (1 row)
! 
! -- test case insensitive
! SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
!  regexp_matches 
! ----------------
!  {bAR,bEqUE}
! (1 row)
! 
! -- global option - more than one match
! SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
!  regexp_matches 
! ----------------
!  {bar,beque}
!  {bazil,barf}
! (2 rows)
! 
! -- empty capture group (matched empty string)
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
!  regexp_matches 
! ----------------
!  {bar,"",beque}
! (1 row)
! 
! -- no match
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
!  regexp_matches 
! ----------------
! (0 rows)
! 
! -- optional capture group did not match, null entry in array
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
!   regexp_matches  
! ------------------
!  {bar,NULL,beque}
! (1 row)
! 
! -- no capture groups
! SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
!  regexp_matches 
! ----------------
!  {barbeque}
! (1 row)
! 
! -- give me errors
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
! ERROR:  invalid regexp option: z
! SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
! ERROR:  invalid regular expression: parentheses () not balanced
! SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
! ERROR:  invalid regular expression: invalid repetition count(s)
! -- split string on regexp
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
!   foo   | length 
! --------+--------
!  the    |      3
!  quick  |      5
!  brown  |      5
!  fox    |      3
!  jumped |      6
!  over   |      4
!  the    |      3
!  lazy   |      4
!  dog    |      3
! (9 rows)
! 
! SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
!                regexp_split_array               
! ------------------------------------------------
!  {the,quick,brown,fox,jumped,over,the,lazy,dog}
! (1 row)
! 
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
!  foo | length 
! -----+--------
!  t   |      1
!  h   |      1
!  e   |      1
!  q   |      1
!  u   |      1
!  i   |      1
!  c   |      1
!  k   |      1
!  b   |      1
!  r   |      1
!  o   |      1
!  w   |      1
!  n   |      1
!  f   |      1
!  o   |      1
!  x   |      1
!  j   |      1
!  u   |      1
!  m   |      1
!  p   |      1
!  e   |      1
!  d   |      1
!  o   |      1
!  v   |      1
!  e   |      1
!  r   |      1
!  t   |      1
!  h   |      1
!  e   |      1
!  l   |      1
!  a   |      1
!  z   |      1
!  y   |      1
!  d   |      1
!  o   |      1
!  g   |      1
! (36 rows)
! 
! SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
!                             regexp_split_array                             
! ---------------------------------------------------------------------------
!  {t,h,e,q,u,i,c,k,b,r,o,w,n,f,o,x,j,u,m,p,e,d,o,v,e,r,t,h,e,l,a,z,y,d,o,g}
! (1 row)
! 
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', '') AS foo;
!  foo | length 
! -----+--------
!  t   |      1
!  h   |      1
!  e   |      1
!      |      1
!  q   |      1
!  u   |      1
!  i   |      1
!  c   |      1
!  k   |      1
!      |      1
!  b   |      1
!  r   |      1
!  o   |      1
!  w   |      1
!  n   |      1
!      |      1
!  f   |      1
!  o   |      1
!  x   |      1
!      |      1
!  j   |      1
!  u   |      1
!  m   |      1
!  p   |      1
!  e   |      1
!  d   |      1
!      |      1
!  o   |      1
!  v   |      1
!  e   |      1
!  r   |      1
!      |      1
!  t   |      1
!  h   |      1
!  e   |      1
!      |      1
!  l   |      1
!  a   |      1
!  z   |      1
!  y   |      1
!      |      1
!  d   |      1
!  o   |      1
!  g   |      1
! (44 rows)
! 
! SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', '');
!                                             regexp_split_array                                             
! -----------------------------------------------------------------------------------------------------------
!  {t,h,e," ",q,u,i,c,k," ",b,r,o,w,n," ",f,o,x," ",j,u,m,p,e,d," ",o,v,e,r," ",t,h,e," ",l,a,z,y," ",d,o,g}
! (1 row)
! 
! -- case insensitive
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
!           foo          | length 
! -----------------------+--------
!  th                    |      2
!   QUick bROWn FOx jUMP |     21
!  d ov                  |      4
!  r TH                  |      4
!   lazy dOG             |      9
! (5 rows)
! 
! SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
!                    regexp_split_array                   
! --------------------------------------------------------
!  {th," QUick bROWn FOx jUMP","d ov","r TH"," lazy dOG"}
! (1 row)
! 
! -- no match of pattern
! SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
!                      foo                      | length 
! ----------------------------------------------+--------
!  the quick brown fox jumped over the lazy dog |     44
! (1 row)
! 
! SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', 'nomatch');
!                 regexp_split_array                
! --------------------------------------------------
!  {"the quick brown fox jumped over the lazy dog"}
! (1 row)
! 
! -- array version limit
! SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$, '', 8);
!  regexp_split_array 
! --------------------
!  {t,h,e,q,u,i,c,k}
! (1 row)
! 
! -- errors
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
! ERROR:  invalid regexp option: z
! SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
! ERROR:  invalid regexp option: z
! -- global option meaningless for regexp_split
! SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
! ERROR:  regexp_split does not support the global option
! SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
! ERROR:  regexp_split does not support the global option
! -- change NULL-display back
! \pset null ''
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
   4 
Index: src/test/regress/sql/strings.sql
===================================================================
RCS file: /home/jeremyd/local/postgres/cvsuproot/pgsql/src/test/regress/sql/strings.sql,v
retrieving revision 1.18
diff -c -r1.18 strings.sql
*** src/test/regress/sql/strings.sql	6 Mar 2006 19:49:20 -0000	1.18
--- src/test/regress/sql/strings.sql	18 Feb 2007 00:20:59 -0000
***************
*** 85,93 ****
  SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
  SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
! -- invalid option of REGEXP_REPLACE
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
  
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";
  
--- 85,146 ----
  SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g');
  SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
! -- invalid regexp option
  SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
  
+ -- set so we can tell NULL from empty string
+ \pset null '\\N'
+ 
+ -- return all matches from regexp
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+ 
+ -- test case insensitive
+ SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+ 
+ -- global option - more than one match
+ SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+ 
+ -- empty capture group (matched empty string)
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+ -- no match
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+ -- optional capture group did not match, null entry in array
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+ 
+ -- no capture groups
+ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+ 
+ -- give me errors
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+ SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+ SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+ 
+ -- split string on regexp
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+ 
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', '') AS foo;
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', '');
+ -- case insensitive
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+ SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
+ -- no match of pattern
+ SELECT foo, length(foo) FROM regexp_split('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', 'nomatch');
+ -- array version limit
+ SELECT regexp_split_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$, '', 8);
+ -- errors
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+ SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
+ -- global option meaningless for regexp_split
+ SELECT foo, length(foo) FROM regexp_split('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+ SELECT regexp_split_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
+ 
+ -- change NULL-display back
+ \pset null ''
+ 
  -- E021-11 position expression
  SELECT POSITION('4' IN '1234567890') = '4' AS "4";