array_accum aggregate

From: Stephen Frost <sfrost(at)snowman(dot)net>
To: pgsql-patches(at)postgresql(dot)org
Subject: array_accum aggregate
Date: 2006-10-11 04:51:16
Message-ID: 20061011045116.GW24675@kenobi.snowman.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers pgsql-patches

Greetings,

Please find below a patch to add the array_accum aggregate as a
built-in using two new C functions defined in array_userfuncs.c.
These functions simply expose the pre-existing efficient array
building routines used elsewhere in the backend (accumArrayResult
and makeArrayResult, specifically). An array_accum aggregate has
existed in the documentation for quite some time using the
inefficient (for larger arrays) array_append routine. The
documentation around the example has also been updated to reflect
the addition of this built-in.

Documentation and a regression test are also included.

Thanks,

Stephen

Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.343
diff -c -r1.343 func.sgml
*** doc/src/sgml/func.sgml 1 Oct 2006 18:54:31 -0000 1.343
--- doc/src/sgml/func.sgml 11 Oct 2006 04:38:45 -0000
***************
*** 7851,7856 ****
--- 7851,7872 ----
<row>
<entry>
<indexterm>
+ <primary>array_accum</primary>
+ </indexterm>
+ <function>array_accum(<replaceable class="parameter">anyelement</replaceable>)</function>
+ </entry>
+ <entry>
+ <type>anyelement</type>
+ </entry>
+ <entry>
+ array of elements of same type as argument type
+ </entry>
+ <entry>an array of all input elements (NULLs, non-nulls, and duplicates)</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
<primary>average</primary>
</indexterm>
<function>avg(<replaceable class="parameter">expression</replaceable>)</function>
Index: doc/src/sgml/xaggr.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v
retrieving revision 1.33
diff -c -r1.33 xaggr.sgml
*** doc/src/sgml/xaggr.sgml 16 Sep 2006 00:30:16 -0000 1.33
--- doc/src/sgml/xaggr.sgml 11 Oct 2006 04:38:45 -0000
***************
*** 132,138 ****
</programlisting>

Here, the actual state type for any aggregate call is the array type
! having the actual input type as elements.
</para>

<para>
--- 132,141 ----
</programlisting>

Here, the actual state type for any aggregate call is the array type
! having the actual input type as elements. Note: array_accum() is now
! a built-in aggregate which uses a much more efficient mechanism than
! that which is provided by array_append, prior users of array_accum()
! may be pleasantly suprised at the marked improvment for larger arrays.
</para>

<para>
Index: src/backend/utils/adt/array_userfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/array_userfuncs.c,v
retrieving revision 1.20
diff -c -r1.20 array_userfuncs.c
*** src/backend/utils/adt/array_userfuncs.c 14 Jul 2006 14:52:23 -0000 1.20
--- src/backend/utils/adt/array_userfuncs.c 11 Oct 2006 04:38:46 -0000
***************
*** 15,20 ****
--- 15,22 ----
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "nodes/execnodes.h"


/*-----------------------------------------------------------------------------
***************
*** 399,404 ****
--- 401,516 ----
PG_RETURN_ARRAYTYPE_P(result);
}

+ /* Structure, used by aaccum_sfunc and aaccum_ffunc to
+ * implement the array_accum() aggregate, for storing
+ * pointers to the ArrayBuildState for the array we are
+ * building and the MemoryContext in which it is being
+ * built. Note that this structure is
+ * considered an 'anyarray' externally, which is a
+ * variable-length datatype, and therefore
+ * must open with an int32 defining the length. */
+ typedef struct {
+ int32 vl_len;
+ ArrayBuildState *astate;
+ MemoryContext arrctx;
+ } aaccum_info;
+
+ /*-----------------------------------------------------------------------------
+ * aaccum_sfunc :
+ * State transistion function for the array_accum() aggregate,
+ * efficiently builds an in-memory array by working in blocks and
+ * minimizing realloc()'s and copying of the data in general.
+ * Creates a seperate memory context attached to the AggContext into
+ * which the array is built. That context is free'd when the final
+ * function is called (aaccum_ffunc). accumArrayResult() does all
+ * the heavy lifting here, this is really just a glue function.
+ *----------------------------------------------------------------------------
+ */
+ Datum
+ aaccum_sfunc(PG_FUNCTION_ARGS)
+ {
+ aaccum_info *ainfo;
+ AggState *aggstate;
+
+ /* Make sure we are being called in an aggregate. */
+ if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+ errhint("Use the array_accum aggregate")));
+
+ aggstate = (AggState*) fcinfo->context;
+
+ /* Initial call passes NULL in for our state variable.
+ * Allocate memory to store the pointers in and create
+ * our context. */
+ if (PG_ARGISNULL(0)) {
+ /* Allocate memory to hold the pointers to the ArrayBuildState
+ * and the MemoryContext where we are building the array. Note
+ * that we can do this in the CurrentMemoryContext because when
+ * we return the storage "bytea" will be copied into the AggState
+ * context by the caller and passed back to us on the next call. */
+ ainfo = (aaccum_info*) palloc(sizeof(aaccum_info));
+ ainfo->vl_len = sizeof(aaccum_info);
+ ainfo->astate = NULL;
+
+ /* New context created which will store our array accumulation.
+ * The parent is the AggContext for this query since it needs to
+ * persist for the same timeframe as the state value.
+ * The state value holds the pointers to the ArrayBuildState and this
+ * MemoryContext through the aaccum_info structure. */
+ ainfo->arrctx = AllocSetContextCreate(aggstate->aggcontext, "ArrayAccumCtx",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ } else {
+ /* Our state variable is non-null, therefore it must be an existing
+ * ainfo structure. */
+ ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+ }
+
+ /* Pull the element to be added and pass it along with the ArrayBuildState
+ * and ArrayAccumCtx MemoryContext to accumArrayResult, checking if it is
+ * NULL or not. */
+ ainfo->astate = accumArrayResult(ainfo->astate,
+ PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1),
+ PG_ARGISNULL(1),
+ get_fn_expr_argtype(fcinfo->flinfo, 1),
+ ainfo->arrctx);
+
+ /* Caller will copy storage into the AggContext after the first call and then
+ * should not touch it as we will always return the same pointer passed in. */
+ PG_RETURN_BYTEA_P(ainfo);
+ }
+
+ /*-----------------------------------------------------------------------------
+ * aaccum_ffunc :
+ * Final function for the array_accum() aggregate, creates the final
+ * finished array and passes it back to the user. Also deletes the
+ * memory context created by the aaccum_sfunc(). makeArrayResult()
+ * does all the heavy lifting here, this is really just a glue function.
+ *----------------------------------------------------------------------------
+ */
+ Datum
+ aaccum_ffunc(PG_FUNCTION_ARGS)
+ {
+ aaccum_info *ainfo;
+
+ /* Check if we are passed in a NULL */
+ if (PG_ARGISNULL(0)) PG_RETURN_ARRAYTYPE_P(NULL);
+
+ /* Make sure we are being called in an aggregate. */
+ if (!fcinfo->context || !IsA(fcinfo->context, AggState))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("Can not call aaccum_sfunc as a non-aggregate"),
+ errhint("Use the array_accum aggregate")));
+
+ ainfo = (aaccum_info*) PG_GETARG_BYTEA_P(0);
+
+ /* makeArrayResult will delete ainfo->arrctx for us. */
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(ainfo->astate, ainfo->arrctx));
+ }

/*
* used by text_to_array() in varlena.c
Index: src/include/catalog/pg_aggregate.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_aggregate.h,v
retrieving revision 1.58
diff -c -r1.58 pg_aggregate.h
*** src/include/catalog/pg_aggregate.h 4 Oct 2006 00:30:07 -0000 1.58
--- src/include/catalog/pg_aggregate.h 11 Oct 2006 04:38:46 -0000
***************
*** 221,226 ****
--- 221,229 ----
DATA(insert ( 2242 bitand - 0 1560 _null_ ));
DATA(insert ( 2243 bitor - 0 1560 _null_ ));

+ /* array accumulation */
+ DATA(insert ( 322 aaccum_sfunc aaccum_ffunc 0 2277 _null_ ));
+
/*
* prototypes for functions in pg_aggregate.c
*/
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.427
diff -c -r1.427 pg_proc.h
*** src/include/catalog/pg_proc.h 4 Oct 2006 00:30:07 -0000 1.427
--- src/include/catalog/pg_proc.h 11 Oct 2006 04:38:47 -0000
***************
*** 1017,1022 ****
--- 1017,1026 ----
DESCR("larger of two");
DATA(insert OID = 516 ( array_smaller PGNSP PGUID 12 f f t f i 2 2277 "2277 2277" _null_ _null_ _null_ array_smaller - _null_ ));
DESCR("smaller of two");
+ DATA(insert OID = 320 ( aaccum_sfunc PGNSP PGUID 12 f f f f i 2 2277 "2277 2283" _null_ _null_ _null_ aaccum_sfunc - _null_ ));
+ DESCR("array_accum aggregate state function");
+ DATA(insert OID = 321 ( aaccum_ffunc PGNSP PGUID 12 f f f f i 1 2277 "2277" _null_ _null_ _null_ aaccum_ffunc - _null_ ));
+ DESCR("array_accum aggregate final function");

DATA(insert OID = 760 ( smgrin PGNSP PGUID 12 f f t f s 1 210 "2275" _null_ _null_ _null_ smgrin - _null_ ));
DESCR("I/O");
***************
*** 3252,3257 ****
--- 3256,3263 ----
DATA(insert OID = 2828 ( covar_samp PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));
DATA(insert OID = 2829 ( corr PGNSP PGUID 12 t f f f i 2 701 "701 701" _null_ _null_ _null_ aggregate_dummy - _null_ ));

+ DATA(insert OID = 322 ( array_accum PGNSP PGUID 12 t f f f i 1 2277 "2283" _null_ _null_ _null_ aggregate_dummy - _null_ ));
+
DATA(insert OID = 2160 ( text_pattern_lt PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_lt - _null_ ));
DATA(insert OID = 2161 ( text_pattern_le PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_le - _null_ ));
DATA(insert OID = 2162 ( text_pattern_eq PGNSP PGUID 12 f f t f i 2 16 "25 25" _null_ _null_ _null_ text_pattern_eq - _null_ ));
Index: src/include/utils/array.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/array.h,v
retrieving revision 1.59
diff -c -r1.59 array.h
*** src/include/utils/array.h 10 Sep 2006 20:14:20 -0000 1.59
--- src/include/utils/array.h 11 Oct 2006 04:38:47 -0000
***************
*** 266,271 ****
--- 266,274 ----
*/
extern Datum array_push(PG_FUNCTION_ARGS);
extern Datum array_cat(PG_FUNCTION_ARGS);
+ extern Datum aaccum_sfunc(PG_FUNCTION_ARGS);
+ extern Datum aaccum_ffunc(PG_FUNCTION_ARGS);
+

extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo,
Oid element_type,
Index: src/test/regress/expected/aggregates.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/aggregates.out,v
retrieving revision 1.15
diff -c -r1.15 aggregates.out
*** src/test/regress/expected/aggregates.out 28 Jul 2006 18:33:04 -0000 1.15
--- src/test/regress/expected/aggregates.out 11 Oct 2006 04:38:47 -0000
***************
*** 236,241 ****
--- 236,248 ----
9 | 100 | 4
(10 rows)

+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+ array_accum
+ ---------------
+ {0,1,2,3,4,5}
+ (1 row)
+
-- user-defined aggregates
SELECT newavg(four) AS avg_1 FROM onek;
avg_1
Index: src/test/regress/sql/aggregates.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/aggregates.sql,v
retrieving revision 1.13
diff -c -r1.13 aggregates.sql
*** src/test/regress/sql/aggregates.sql 28 Jul 2006 18:33:04 -0000 1.13
--- src/test/regress/sql/aggregates.sql 11 Oct 2006 04:38:48 -0000
***************
*** 59,64 ****
--- 59,67 ----
select ten, count(four), sum(DISTINCT four) from onek
group by ten order by ten;

+ -- array accumulation aggregate
+ SELECT array_accum(generate_series) from generate_series(0,5);
+
-- user-defined aggregates
SELECT newavg(four) AS avg_1 FROM onek;
SELECT newsum(four) AS sum_1500 FROM onek;

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Richard Broersma Jr 2006-10-11 05:31:47 Re: left outer join on multi tables
Previous Message David Fetter 2006-10-11 04:16:42 Re: more anti-postgresql FUD

Browse pgsql-patches by date

  From Date Subject
Next Message Hiroshi Saito 2006-10-12 00:36:32 warning: "min" redefined of qsort.
Previous Message Michael Glaesemann 2006-10-11 03:10:00 Constraint trigger doc patch