Skip site navigation (1) Skip section navigation (2)

array_accum aggregate

From: Stephen Frost <sfrost(at)snowman(dot)net>
To: pgsql-patches(at)postgresql(dot)org
Subject: array_accum aggregate
Date: 2006-10-11 04:51:16
Message-ID: 20061011045116.GW24675@kenobi.snowman.net (view raw or flat)
Thread:
Lists: pgsql-hackerspgsql-patches
Greetings,

  Please find below a patch to add the array_accum aggregate as a
  built-in using two new C functions defined in array_userfuncs.c.
  These functions simply expose the pre-existing efficient array
  building routines used elsewhere in the backend (accumArrayResult
  and makeArrayResult, specifically).   An array_accum aggregate has
  existed in the documentation for quite some time using the
  inefficient (for larger arrays) array_append routine.  The
  documentation around the example has also been updated to reflect
  the addition of this built-in.

  Documentation and a regression test are also included.

  	Thanks,

		Stephen

Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.343
diff -c -r1.343 func.sgml
*** doc/src/sgml/func.sgml	1 Oct 2006 18:54:31 -0000	1.343
--- doc/src/sgml/func.sgml	11 Oct 2006 04:38:45 -0000
***************
*** 7851,7856 ****
--- 7851,7872 ----
       <row>
        <entry>
         <indexterm>
+         <primary>array_accum</primary>
+        </indexterm>
+        <function>array_accum(<replaceable class="parameter">anyelement</replaceable>)</function>
+       </entry>
+       <entry>
+        <type>anyelement</type>
+       </entry>
+       <entry>
+         array of elements of same type as argument type
+       </entry>
+       <entry>an array of all input elements (NULLs, non-nulls, and duplicates)</entry>
+      </row>
+ 
+      <row>
+       <entry>
+        <indexterm>
          <primary>average</primary>
         </indexterm>
         <function>avg(<replaceable class="parameter">expression</replaceable>)</function>
Index: doc/src/sgml/xaggr.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v
retrieving revision 1.33
diff -c -r1.33 xaggr.sgml
*** doc/src/sgml/xaggr.sgml	16 Sep 2006 00:30:16 -0000	1.33
--- doc/src/sgml/xaggr.sgml	11 Oct 2006 04:38:45 -0000
***************
*** 132,138 ****
  </programlisting>
  
     Here, the actual state type for any aggregate call is the array type
!    having the actual input type as elements.
    </para>
  
    <para>
--- 132,141 ----
  </programlisting>
  
     Here, the actual state type for any aggregate call is the array type
!    having the actual input type as elements.  Note: array_accum() is now
!    a built-in aggregate which uses a much more efficient mechanism than
!    that which is provided by array_append, prior users of array_accum()
!    may be pleasantly suprised at the marked improvment for larger arrays.
    </para>
  
    <para>
Index: src/backend/utils/adt/array_userfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/array_userfuncs.c,v
retrieving revision 1.20
diff -c -r1.20 array_userfuncs.c
*** src/backend/utils/adt/array_userfuncs.c	14 Jul 2006 14:52:23 -0000	1.20
--- src/backend/utils/adt/array_userfuncs.c	11 Oct 2006 04:38:46 -0000
***************
*** 15,20 ****
--- 15,22 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "nodes/execnodes.h"
  
  
  /*-----------------------------------------------------------------------------
***************
*** 399,404 ****
--- 401,516 ----
  	PG_RETURN_ARRAYTYPE_P(result);
  }
  
+ /* Structure, used by aaccum_sfunc and aaccum_ffunc to
+  * implement the array_accum() aggregate, for storing 
+  * pointers to the ArrayBuildState for the array we are 
+  * building and the MemoryContext in which it is being
+  * built.  Note that this structure is 
+  * considered an 'anyarray' externally, which is a
+  * variable-length datatype, and therefore
+  * must open with an int32 defining the length. */
+ typedef struct {
+ 	int32				 vl_len;
+ 	ArrayBuildState		*astate;
+ 	MemoryContext		 arrctx;
+ } aaccum_info;
+ 
+ /*-----------------------------------------------------------------------------
+  * aaccum_sfunc :
+  *      State transistion function for the array_accum() aggregate,
+  *      efficiently builds an in-memory array by working in blocks and
+  *      minimizing realloc()'s and copying of the data in general.
+  *      Creates a seperate memory context attached to the AggContext into
+  *      which the array is built.  That context is free'd when the final
+  *      function is called (aaccum_ffunc).  accumArrayResult() does all
+  *      the heavy lifting here, this is really just a glue function.
+  *----------------------------------------------------------------------------
+  */
+ Datum
+ aaccum_sfunc(PG_FUNCTION_ARGS)
+ {
+ 	aaccum_info		*ainfo;
+ 	AggState		*aggstate;
+ 
+ 	/* Make sure we are being called in an aggregate. */
+ 	if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+ 				 errhint("Use the array_accum aggregate")));
+ 
+ 	aggstate = (AggState*) fcinfo->context;
+ 
+ 	/* Initial call passes NULL in for our state variable. 
+ 	 * Allocate memory to store the pointers in and create
+ 	 * our context. */
+ 	if (PG_ARGISNULL(0)) {
+ 		/* Allocate memory to hold the pointers to the ArrayBuildState
+ 		 * and the MemoryContext where we are building the array.  Note
+ 		 * that we can do this in the CurrentMemoryContext because when
+ 		 * we return the storage "bytea" will be copied into the AggState
+ 		 * context by the caller and passed back to us on the next call. */
+ 		ainfo = (aaccum_info*) palloc(sizeof(aaccum_info));
+ 		ainfo->vl_len = sizeof(aaccum_info);
+ 		ainfo->astate = NULL;
+ 
+ 		/* New context created which will store our array accumulation.
+ 		 * The parent is the AggContext for this query since it needs to
+ 		 * persist for the same timeframe as the state value. 
+ 		 * The state value holds the pointers to the ArrayBuildState and this 
+ 		 * MemoryContext through the aaccum_info structure. */
+ 		ainfo->arrctx = AllocSetContextCreate(aggstate->aggcontext, "ArrayAccumCtx",
+ 											  ALLOCSET_DEFAULT_MINSIZE,
+ 											  ALLOCSET_DEFAULT_INITSIZE,
+ 											  ALLOCSET_DEFAULT_MAXSIZE);
+ 	} else {
+ 		/* Our state variable is non-null, therefore it must be an existing
+ 		 * ainfo structure. */
+ 		ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+ 	}
+ 
+ 	/* Pull the element to be added and pass it along with the ArrayBuildState
+ 	 * and ArrayAccumCtx MemoryContext to accumArrayResult, checking if it is
+ 	 * NULL or not. */
+ 	ainfo->astate = accumArrayResult(ainfo->astate, 
+ 									 PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1),
+ 									 PG_ARGISNULL(1),
+ 									 get_fn_expr_argtype(fcinfo->flinfo, 1),
+ 									 ainfo->arrctx);
+ 
+ 	/* Caller will copy storage into the AggContext after the first call and then
+ 	 * should not touch it as we will always return the same pointer passed in. */
+ 	PG_RETURN_BYTEA_P(ainfo);
+ }
+ 
+ /*-----------------------------------------------------------------------------
+  * aaccum_ffunc :
+  *      Final function for the array_accum() aggregate, creates the final
+  *      finished array and passes it back to the user.  Also deletes the
+  *      memory context created by the aaccum_sfunc().  makeArrayResult()
+  *      does all the heavy lifting here, this is really just a glue function.
+  *----------------------------------------------------------------------------
+  */
+ Datum
+ aaccum_ffunc(PG_FUNCTION_ARGS)
+ {
+ 	aaccum_info		*ainfo;
+ 
+ 	/* Check if we are passed in a NULL */
+ 	if (PG_ARGISNULL(0)) PG_RETURN_ARRAYTYPE_P(NULL);
+ 
+ 	/* Make sure we are being called in an aggregate. */
+ 	if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+ 				 errhint("Use the array_accum aggregate")));
+ 
+ 	ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+ 
+ 	/* makeArrayResult will delete ainfo->arrctx for us. */
+ 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(ainfo->astate, ainfo->arrctx));
+ }
  
  /*
   * used by text_to_array() in varlena.c
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.58
diff -c -r1.58 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h	4 Oct 2006 00:30:07 -0000	1.58
--- src/include/catalog/pg_aggregate.h	11 Oct 2006 04:38:46 -0000
***************
*** 221,226 ****
--- 221,229 ----
  DATA(insert ( 2242 bitand		  -					0	1560	_null_ ));
  DATA(insert ( 2243 bitor		  -					0	1560	_null_ ));
  
+ /* array accumulation */
+ DATA(insert ( 322	aaccum_sfunc		  aaccum_ffunc			0	2277	_null_ ));
+ 
  /*
   * prototypes for functions in pg_aggregate.c
   */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.427
diff -c -r1.427 pg_proc.h
*** src/include/catalog/pg_proc.h	4 Oct 2006 00:30:07 -0000	1.427
--- src/include/catalog/pg_proc.h	11 Oct 2006 04:38:47 -0000
***************
*** 1017,1022 ****
--- 1017,1026 ----
  DESCR("larger of two");
  DATA(insert OID = 516 (  array_smaller	   PGNSP PGUID 12 f f t f i 2 2277 "2277 2277" _null_ _null_ _null_ array_smaller - _null_ ));
  DESCR("smaller of two");
+ DATA(insert OID = 320 (  aaccum_sfunc	   PGNSP PGUID 12 f f f f i 2 2277 "2277 2283" _null_ _null_ _null_ aaccum_sfunc - _null_ ));
+ DESCR("array_accum aggregate state function");
+ DATA(insert OID = 321 (  aaccum_ffunc	   PGNSP PGUID 12 f f f f i 1 2277 "2277" _null_ _null_ _null_ aaccum_ffunc - _null_ ));
+ DESCR("array_accum aggregate final function");
  
  DATA(insert OID = 760 (  smgrin			   PGNSP PGUID 12 f f t f s 1 210 "2275" _null_ _null_ _null_  smgrin - _null_ ));
  DESCR("I/O");
***************
*** 3252,3257 ****
--- 3256,3263 ----
  DATA(insert OID = 2828 (  covar_samp		PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
  DATA(insert OID = 2829 (  corr				PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_  aggregate_dummy - _null_ ));
  
+ DATA(insert OID = 322 (  array_accum		PGNSP PGUID 12 t f f f i 1 2277 "2283" _null_ _null_ _null_  aggregate_dummy - _null_ ));
+ 
  DATA(insert OID = 2160 ( text_pattern_lt	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
  DATA(insert OID = 2161 ( text_pattern_le	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
  DATA(insert OID = 2162 ( text_pattern_eq	 PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
Index: src/include/utils/array.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/array.h,v
retrieving revision 1.59
diff -c -r1.59 array.h
*** src/include/utils/array.h	10 Sep 2006 20:14:20 -0000	1.59
--- src/include/utils/array.h	11 Oct 2006 04:38:47 -0000
***************
*** 266,271 ****
--- 266,274 ----
   */
  extern Datum array_push(PG_FUNCTION_ARGS);
  extern Datum array_cat(PG_FUNCTION_ARGS);
+ extern Datum aaccum_sfunc(PG_FUNCTION_ARGS);
+ extern Datum aaccum_ffunc(PG_FUNCTION_ARGS);
+ 
  
  extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
  					   Oid element_type,
Index: src/test/regress/expected/aggregates.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/aggregates.out,v
retrieving revision 1.15
diff -c -r1.15 aggregates.out
*** src/test/regress/expected/aggregates.out	28 Jul 2006 18:33:04 -0000	1.15
--- src/test/regress/expected/aggregates.out	11 Oct 2006 04:38:47 -0000
***************
*** 236,241 ****
--- 236,248 ----
     9 |   100 |   4
  (10 rows)
  
+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+   array_accum  
+ ---------------
+  {0,1,2,3,4,5}
+ (1 row)
+ 
  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
         avg_1        
Index: src/test/regress/sql/aggregates.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/aggregates.sql,v
retrieving revision 1.13
diff -c -r1.13 aggregates.sql
*** src/test/regress/sql/aggregates.sql	28 Jul 2006 18:33:04 -0000	1.13
--- src/test/regress/sql/aggregates.sql	11 Oct 2006 04:38:48 -0000
***************
*** 59,64 ****
--- 59,67 ----
  select ten, count(four), sum(DISTINCT four) from onek
  group by ten order by ten;
  
+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+ 
  -- user-defined aggregates
  SELECT newavg(four) AS avg_1 FROM onek;
  SELECT newsum(four) AS sum_1500 FROM onek;

Responses

pgsql-hackers by date

Next:From: Richard Broersma JrDate: 2006-10-11 05:31:47
Subject: Re: left outer join on multi tables
Previous:From: David FetterDate: 2006-10-11 04:16:42
Subject: Re: more anti-postgresql FUD

pgsql-patches by date

Next:From: Hiroshi SaitoDate: 2006-10-12 00:36:32
Subject: warning: "min" redefined of qsort.
Previous:From: Michael GlaesemannDate: 2006-10-11 03:10:00
Subject: Constraint trigger doc patch

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group