/*
 * strxfrm_test.c: Simple wrapper on stdlib strxfrm() for Postgres.
 *
 * Author: Peter Geoghegan
 *
 * On 18 March 2012 15:08, Tom Lane <tgl@sss.pgh.pa.us> wrote:
 *
 * However, it occurred to me that we could pretty easily jury-rig something
 * that would give us an idea about the actual benefit available here.  To wit:
 * make a C function that wraps strxfrm, basically strxfrm(text) returns bytea.
 * Then compare the performance of ORDER BY text_col to ORDER BY
 * strxfrm(text_col). (You would need to have either both or neither of text
 * and bytea using the sortsupport code paths for this to be a fair
 * comparison.)
 */

#include "postgres.h"
#include <string.h>
#include "fmgr.h"
#include "catalog/pg_collation.h"
#include "mb/pg_wchar.h"
#include "utils/pg_locale.h"
#include "utils/builtins.h"

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

Datum strxfrm_test(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(strxfrm_test);

/*
 * BYTEA strxfrm_test(TEXT string)
 */
Datum
strxfrm_test(PG_FUNCTION_ARGS)
{
	/*
	 * Unfortunately, just like with strcoll we need a null-terminated cstring
	 */
	char	*in_string = text_to_cstring(PG_GETARG_TEXT_PP(0));
	Size	 tclen = strlen(in_string);
	Size	 blob_buf_size, act_size;
	bytea	*result;

	/*
	 * Buffer for blob is 5 times the size of original cstring (less NULL byte
	 * + header).  There's undoubtedly a better-principled way of sizing this,
	 * such as by doing a "dry-run".  Right now, it is at least robust enough
	 * that it isn't immediately apparent that it's broken with a relatively
	 * small amount of varied input.
	 */

	blob_buf_size = Max(tclen, 2) * 5 + VARHDRSZ;
	result = palloc(blob_buf_size);

	/*
	 * I assume that the above ill-principled buffer sizing isn't a problem for
	 * now, since our immediate concern is determining the performance
	 * characteristics of strxfrm()'ing dynamically, before beginning the sort
	 * rather than doing everything within strcoll(), as in the existing code.
	 *
	 * The blobs are sometimes quite a bit bigger than their originating
	 * string:
	 *
	 * postgres=# show lc_collate;
	 * lc_collate
	 * -------------
	 * en_US.UTF-8
	 * (1 row)
	 *
	 * postgres=# select strxfrm_test('f');
	 * strxfrm_test
	 * --------------
	 * \x1101090109
	 * (1 row)
	 *
	 * Not by a fixed proportion though:
	 *
	 * postgres=# select strxfrm_test('fffff');
	 *				strxfrm_test
	 * --------------------------------------
	 * \x1111111111010909090909010909090909
	 * (1 row)
	 */

	act_size = strxfrm(VARDATA(result), in_string,
					blob_buf_size - VARHDRSZ);

	/* Alter header to indicate actual buffer length */
	SET_VARSIZE(result, VARHDRSZ + act_size);

	/*
	 * "If the value returned (act_size) is n or more, the contents of the
	 * array pointed to by s1 (blob) are unspecified."
	 */
	Assert(act_size < blob_buf_size - VARHDRSZ);

	pfree(in_string);

	PG_RETURN_BYTEA_P(result);
}
