From 5ccc5d1a54d0f6c7c47381533c879a9432fb925f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 25 Nov 2024 12:19:15 +0900
Subject: [PATCH v35 1/7] Add support for adding custom COPY TO format

This uses the handler approach like tablesample. The approach creates
an internal function that returns an internal struct. In this case,
a COPY TO handler returns a CopyToRoutine.

This also add a test module for custom COPY TO handler.
---
 src/backend/commands/copy.c                   | 79 ++++++++++++++++---
 src/backend/commands/copyto.c                 | 36 +++++++--
 src/backend/nodes/Makefile                    |  1 +
 src/backend/nodes/gen_node_support.pl         |  2 +
 src/backend/utils/adt/pseudotypes.c           |  1 +
 src/include/catalog/pg_proc.dat               |  6 ++
 src/include/catalog/pg_type.dat               |  6 ++
 src/include/commands/copy.h                   |  1 +
 src/include/commands/copyapi.h                |  2 +
 src/include/nodes/meson.build                 |  1 +
 src/test/modules/Makefile                     |  1 +
 src/test/modules/meson.build                  |  1 +
 src/test/modules/test_copy_format/.gitignore  |  4 +
 src/test/modules/test_copy_format/Makefile    | 23 ++++++
 .../expected/test_copy_format.out             | 17 ++++
 src/test/modules/test_copy_format/meson.build | 33 ++++++++
 .../test_copy_format/sql/test_copy_format.sql |  5 ++
 .../test_copy_format--1.0.sql                 |  8 ++
 .../test_copy_format/test_copy_format.c       | 63 +++++++++++++++
 .../test_copy_format/test_copy_format.control |  4 +
 20 files changed, 273 insertions(+), 21 deletions(-)
 mode change 100644 => 100755 src/backend/nodes/gen_node_support.pl
 create mode 100644 src/test/modules/test_copy_format/.gitignore
 create mode 100644 src/test/modules/test_copy_format/Makefile
 create mode 100644 src/test/modules/test_copy_format/expected/test_copy_format.out
 create mode 100644 src/test/modules/test_copy_format/meson.build
 create mode 100644 src/test/modules/test_copy_format/sql/test_copy_format.sql
 create mode 100644 src/test/modules/test_copy_format/test_copy_format--1.0.sql
 create mode 100644 src/test/modules/test_copy_format/test_copy_format.c
 create mode 100644 src/test/modules/test_copy_format/test_copy_format.control

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index cfca9d9dc29..8d94bc313eb 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -32,6 +32,7 @@
 #include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
+#include "parser/parse_func.h"
 #include "parser/parse_relation.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -476,6 +477,70 @@ defGetCopyLogVerbosityChoice(DefElem *def, ParseState *pstate)
 	return COPY_LOG_VERBOSITY_DEFAULT;	/* keep compiler quiet */
 }
 
+/*
+ * Process the "format" option.
+ *
+ * This function checks whether the option value is a built-in format such as
+ * "text" and "csv" or not. If the option value isn't a built-in format, this
+ * function finds a COPY format handler that returns a CopyToRoutine (for
+ * is_from == false). If no COPY format handler is found, this function
+ * reports an error.
+ */
+static void
+ProcessCopyOptionFormat(ParseState *pstate,
+						CopyFormatOptions *opts_out,
+						bool is_from,
+						DefElem *defel)
+{
+	char	   *format;
+	Oid			funcargtypes[1];
+	Oid			handlerOid = InvalidOid;
+
+	format = defGetString(defel);
+
+	opts_out->csv_mode = false;
+	opts_out->binary = false;
+	/* built-in formats */
+	if (strcmp(format, "text") == 0)
+	{
+		/* "csv_mode == false && binary == false" means "text" */
+		return;
+	}
+	else if (strcmp(format, "csv") == 0)
+	{
+		opts_out->csv_mode = true;
+		return;
+	}
+	else if (strcmp(format, "binary") == 0)
+	{
+		opts_out->binary = true;
+		return;
+	}
+
+	/* custom format */
+	if (!is_from)
+	{
+		funcargtypes[0] = INTERNALOID;
+		handlerOid = LookupFuncName(list_make1(makeString(format)), 1,
+									funcargtypes, true);
+	}
+	if (!OidIsValid(handlerOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("COPY format \"%s\" not recognized", format),
+				 parser_errposition(pstate, defel->location)));
+
+	/* check that handler has correct return type */
+	if (get_func_rettype(handlerOid) != COPY_HANDLEROID)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("function %s must return type %s",
+						format, "copy_handler"),
+				 parser_errposition(pstate, defel->location)));
+
+	opts_out->handler = handlerOid;
+}
+
 /*
  * Process the statement option list for COPY.
  *
@@ -519,22 +584,10 @@ ProcessCopyOptions(ParseState *pstate,
 
 		if (strcmp(defel->defname, "format") == 0)
 		{
-			char	   *fmt = defGetString(defel);
-
 			if (format_specified)
 				errorConflictingDefElem(defel, pstate);
 			format_specified = true;
-			if (strcmp(fmt, "text") == 0)
-				 /* default format */ ;
-			else if (strcmp(fmt, "csv") == 0)
-				opts_out->csv_mode = true;
-			else if (strcmp(fmt, "binary") == 0)
-				opts_out->binary = true;
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-						 errmsg("COPY format \"%s\" not recognized", fmt),
-						 parser_errposition(pstate, defel->location)));
+			ProcessCopyOptionFormat(pstate, opts_out, is_from, defel);
 		}
 		else if (strcmp(defel->defname, "freeze") == 0)
 		{
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 721d29f8e53..0d33d101735 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -150,6 +150,7 @@ static void CopySendInt16(CopyToState cstate, int16 val);
 
 /* text format */
 static const CopyToRoutine CopyToRoutineText = {
+	.type = T_CopyToRoutine,
 	.CopyToStart = CopyToTextLikeStart,
 	.CopyToOutFunc = CopyToTextLikeOutFunc,
 	.CopyToOneRow = CopyToTextOneRow,
@@ -158,6 +159,7 @@ static const CopyToRoutine CopyToRoutineText = {
 
 /* CSV format */
 static const CopyToRoutine CopyToRoutineCSV = {
+	.type = T_CopyToRoutine,
 	.CopyToStart = CopyToTextLikeStart,
 	.CopyToOutFunc = CopyToTextLikeOutFunc,
 	.CopyToOneRow = CopyToCSVOneRow,
@@ -166,6 +168,7 @@ static const CopyToRoutine CopyToRoutineCSV = {
 
 /* binary format */
 static const CopyToRoutine CopyToRoutineBinary = {
+	.type = T_CopyToRoutine,
 	.CopyToStart = CopyToBinaryStart,
 	.CopyToOutFunc = CopyToBinaryOutFunc,
 	.CopyToOneRow = CopyToBinaryOneRow,
@@ -174,15 +177,32 @@ static const CopyToRoutine CopyToRoutineBinary = {
 
 /* Return a COPY TO routine for the given options */
 static const CopyToRoutine *
-CopyToGetRoutine(CopyFormatOptions opts)
+CopyToGetRoutine(CopyFormatOptions *opts)
 {
-	if (opts.csv_mode)
-		return &CopyToRoutineCSV;
-	else if (opts.binary)
-		return &CopyToRoutineBinary;
+	if (OidIsValid(opts->handler))
+	{
+		Datum		datum;
+		Node	   *routine;
 
-	/* default is text */
-	return &CopyToRoutineText;
+		datum = OidFunctionCall1(opts->handler, BoolGetDatum(false));
+		routine = (Node *) DatumGetPointer(datum);
+		if (routine == NULL || !IsA(routine, CopyToRoutine))
+			ereport(
+					ERROR,
+					(errcode(
+							 ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("COPY handler function "
+							"%u did not return "
+							"CopyToRoutine struct",
+							opts->handler)));
+		return castNode(CopyToRoutine, routine);
+	}
+	else if (opts->csv_mode)
+		return &CopyToRoutineCSV;
+	else if (opts->binary)
+		return &CopyToRoutineBinary;
+	else
+		return &CopyToRoutineText;
 }
 
 /* Implementation of the start callback for text and CSV formats */
@@ -700,7 +720,7 @@ BeginCopyTo(ParseState *pstate,
 	ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
 
 	/* Set format routine */
-	cstate->routine = CopyToGetRoutine(cstate->opts);
+	cstate->routine = CopyToGetRoutine(&cstate->opts);
 
 	/* Process the source/target relation or query */
 	if (rel)
diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile
index 77ddb9ca53f..dc6c1087361 100644
--- a/src/backend/nodes/Makefile
+++ b/src/backend/nodes/Makefile
@@ -50,6 +50,7 @@ node_headers = \
 	access/sdir.h \
 	access/tableam.h \
 	access/tsmapi.h \
+	commands/copyapi.h \
 	commands/event_trigger.h \
 	commands/trigger.h \
 	executor/tuptable.h \
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
old mode 100644
new mode 100755
index 1a657f7e0ae..fb90635a245
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -62,6 +62,7 @@ my @all_input_files = qw(
   access/sdir.h
   access/tableam.h
   access/tsmapi.h
+  commands/copyapi.h
   commands/event_trigger.h
   commands/trigger.h
   executor/tuptable.h
@@ -86,6 +87,7 @@ my @nodetag_only_files = qw(
   access/sdir.h
   access/tableam.h
   access/tsmapi.h
+  commands/copyapi.h
   commands/event_trigger.h
   commands/trigger.h
   executor/tuptable.h
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
index 317a1f2b282..f2ebc21ca56 100644
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -370,6 +370,7 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler);
 PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler);
 PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler);
 PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(copy_handler);
 PSEUDOTYPE_DUMMY_IO_FUNCS(internal);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cd9422d0bac..9e7737168c4 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7809,6 +7809,12 @@
 { oid => '3312', descr => 'I/O',
   proname => 'tsm_handler_out', prorettype => 'cstring',
   proargtypes => 'tsm_handler', prosrc => 'tsm_handler_out' },
+{ oid => '8753', descr => 'I/O',
+  proname => 'copy_handler_in', proisstrict => 'f', prorettype => 'copy_handler',
+  proargtypes => 'cstring', prosrc => 'copy_handler_in' },
+{ oid => '8754', descr => 'I/O',
+  proname => 'copy_handler_out', prorettype => 'cstring',
+  proargtypes => 'copy_handler', prosrc => 'copy_handler_out' },
 { oid => '267', descr => 'I/O',
   proname => 'table_am_handler_in', proisstrict => 'f',
   prorettype => 'table_am_handler', proargtypes => 'cstring',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index 6dca77e0a22..340e0cd0a8d 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -633,6 +633,12 @@
   typcategory => 'P', typinput => 'tsm_handler_in',
   typoutput => 'tsm_handler_out', typreceive => '-', typsend => '-',
   typalign => 'i' },
+{ oid => '8752',
+  descr => 'pseudo-type for the result of a copy to method function',
+  typname => 'copy_handler', typlen => '4', typbyval => 't', typtype => 'p',
+  typcategory => 'P', typinput => 'copy_handler_in',
+  typoutput => 'copy_handler_out', typreceive => '-', typsend => '-',
+  typalign => 'i' },
 { oid => '269',
   descr => 'pseudo-type for the result of a table AM handler function',
   typname => 'table_am_handler', typlen => '4', typbyval => 't', typtype => 'p',
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 06dfdfef721..332628d67cc 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -87,6 +87,7 @@ typedef struct CopyFormatOptions
 	CopyLogVerbosityChoice log_verbosity;	/* verbosity of logged messages */
 	int64		reject_limit;	/* maximum tolerable number of errors */
 	List	   *convert_select; /* list of column names (can be NIL) */
+	Oid			handler;		/* handler function for custom format routine */
 } CopyFormatOptions;
 
 /* These are private in commands/copy[from|to].c */
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 2a2d2f9876b..4f4ffabf882 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -22,6 +22,8 @@
  */
 typedef struct CopyToRoutine
 {
+	NodeTag		type;
+
 	/*
 	 * Set output function information. This callback is called once at the
 	 * beginning of COPY TO.
diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build
index d1ca24dd32f..96e70e7f38b 100644
--- a/src/include/nodes/meson.build
+++ b/src/include/nodes/meson.build
@@ -12,6 +12,7 @@ node_support_input_i = [
   'access/sdir.h',
   'access/tableam.h',
   'access/tsmapi.h',
+  'commands/copyapi.h',
   'commands/event_trigger.h',
   'commands/trigger.h',
   'executor/tuptable.h',
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 4e4be3fa511..c9da440eed0 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -16,6 +16,7 @@ SUBDIRS = \
 		  spgist_name_ops \
 		  test_bloomfilter \
 		  test_copy_callbacks \
+		  test_copy_format \
 		  test_custom_rmgrs \
 		  test_ddl_deparse \
 		  test_dsa \
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 2b057451473..d33bbbd4092 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -15,6 +15,7 @@ subdir('spgist_name_ops')
 subdir('ssl_passphrase_callback')
 subdir('test_bloomfilter')
 subdir('test_copy_callbacks')
+subdir('test_copy_format')
 subdir('test_custom_rmgrs')
 subdir('test_ddl_deparse')
 subdir('test_dsa')
diff --git a/src/test/modules/test_copy_format/.gitignore b/src/test/modules/test_copy_format/.gitignore
new file mode 100644
index 00000000000..5dcb3ff9723
--- /dev/null
+++ b/src/test/modules/test_copy_format/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_copy_format/Makefile b/src/test/modules/test_copy_format/Makefile
new file mode 100644
index 00000000000..8497f91624d
--- /dev/null
+++ b/src/test/modules/test_copy_format/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_copy_format/Makefile
+
+MODULE_big = test_copy_format
+OBJS = \
+	$(WIN32RES) \
+	test_copy_format.o
+PGFILEDESC = "test_copy_format - test custom COPY FORMAT"
+
+EXTENSION = test_copy_format
+DATA = test_copy_format--1.0.sql
+
+REGRESS = test_copy_format
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_copy_format
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_copy_format/expected/test_copy_format.out b/src/test/modules/test_copy_format/expected/test_copy_format.out
new file mode 100644
index 00000000000..adfe7d1572a
--- /dev/null
+++ b/src/test/modules/test_copy_format/expected/test_copy_format.out
@@ -0,0 +1,17 @@
+CREATE EXTENSION test_copy_format;
+CREATE TABLE public.test (a smallint, b integer, c bigint);
+INSERT INTO public.test VALUES (1, 2, 3), (12, 34, 56), (123, 456, 789);
+COPY public.test FROM stdin WITH (FORMAT 'test_copy_format');
+ERROR:  COPY format "test_copy_format" not recognized
+LINE 1: COPY public.test FROM stdin WITH (FORMAT 'test_copy_format')...
+                                          ^
+COPY public.test TO stdout WITH (FORMAT 'test_copy_format');
+NOTICE:  test_copy_format: is_from=false
+NOTICE:  CopyToOutFunc: atttypid=21
+NOTICE:  CopyToOutFunc: atttypid=23
+NOTICE:  CopyToOutFunc: atttypid=20
+NOTICE:  CopyToStart: natts=3
+NOTICE:  CopyToOneRow: tts_nvalid=3
+NOTICE:  CopyToOneRow: tts_nvalid=3
+NOTICE:  CopyToOneRow: tts_nvalid=3
+NOTICE:  CopyToEnd
diff --git a/src/test/modules/test_copy_format/meson.build b/src/test/modules/test_copy_format/meson.build
new file mode 100644
index 00000000000..a45a2e0a039
--- /dev/null
+++ b/src/test/modules/test_copy_format/meson.build
@@ -0,0 +1,33 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+test_copy_format_sources = files(
+  'test_copy_format.c',
+)
+
+if host_system == 'windows'
+  test_copy_format_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'test_copy_format',
+    '--FILEDESC', 'test_copy_format - test custom COPY FORMAT',])
+endif
+
+test_copy_format = shared_module('test_copy_format',
+  test_copy_format_sources,
+  kwargs: pg_test_mod_args,
+)
+test_install_libs += test_copy_format
+
+test_install_data += files(
+  'test_copy_format.control',
+  'test_copy_format--1.0.sql',
+)
+
+tests += {
+  'name': 'test_copy_format',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'regress': {
+    'sql': [
+      'test_copy_format',
+    ],
+  },
+}
diff --git a/src/test/modules/test_copy_format/sql/test_copy_format.sql b/src/test/modules/test_copy_format/sql/test_copy_format.sql
new file mode 100644
index 00000000000..810b3d8cedc
--- /dev/null
+++ b/src/test/modules/test_copy_format/sql/test_copy_format.sql
@@ -0,0 +1,5 @@
+CREATE EXTENSION test_copy_format;
+CREATE TABLE public.test (a smallint, b integer, c bigint);
+INSERT INTO public.test VALUES (1, 2, 3), (12, 34, 56), (123, 456, 789);
+COPY public.test FROM stdin WITH (FORMAT 'test_copy_format');
+COPY public.test TO stdout WITH (FORMAT 'test_copy_format');
diff --git a/src/test/modules/test_copy_format/test_copy_format--1.0.sql b/src/test/modules/test_copy_format/test_copy_format--1.0.sql
new file mode 100644
index 00000000000..d24ea03ce99
--- /dev/null
+++ b/src/test/modules/test_copy_format/test_copy_format--1.0.sql
@@ -0,0 +1,8 @@
+/* src/test/modules/test_copy_format/test_copy_format--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_copy_format" to load this file. \quit
+
+CREATE FUNCTION test_copy_format(internal)
+	RETURNS copy_handler
+	AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/src/test/modules/test_copy_format/test_copy_format.c b/src/test/modules/test_copy_format/test_copy_format.c
new file mode 100644
index 00000000000..b42d472d851
--- /dev/null
+++ b/src/test/modules/test_copy_format/test_copy_format.c
@@ -0,0 +1,63 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_copy_format.c
+ *		Code for testing custom COPY format.
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/test_copy_format/test_copy_format.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "commands/copyapi.h"
+#include "commands/defrem.h"
+
+PG_MODULE_MAGIC;
+
+static void
+CopyToOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
+{
+	ereport(NOTICE, (errmsg("CopyToOutFunc: atttypid=%d", atttypid)));
+}
+
+static void
+CopyToStart(CopyToState cstate, TupleDesc tupDesc)
+{
+	ereport(NOTICE, (errmsg("CopyToStart: natts=%d", tupDesc->natts)));
+}
+
+static void
+CopyToOneRow(CopyToState cstate, TupleTableSlot *slot)
+{
+	ereport(NOTICE, (errmsg("CopyToOneRow: tts_nvalid=%u", slot->tts_nvalid)));
+}
+
+static void
+CopyToEnd(CopyToState cstate)
+{
+	ereport(NOTICE, (errmsg("CopyToEnd")));
+}
+
+static const CopyToRoutine CopyToRoutineTestCopyFormat = {
+	.type = T_CopyToRoutine,
+	.CopyToOutFunc = CopyToOutFunc,
+	.CopyToStart = CopyToStart,
+	.CopyToOneRow = CopyToOneRow,
+	.CopyToEnd = CopyToEnd,
+};
+
+PG_FUNCTION_INFO_V1(test_copy_format);
+Datum
+test_copy_format(PG_FUNCTION_ARGS)
+{
+	bool		is_from = PG_GETARG_BOOL(0);
+
+	ereport(NOTICE,
+			(errmsg("test_copy_format: is_from=%s", is_from ? "true" : "false")));
+
+	PG_RETURN_POINTER(&CopyToRoutineTestCopyFormat);
+}
diff --git a/src/test/modules/test_copy_format/test_copy_format.control b/src/test/modules/test_copy_format/test_copy_format.control
new file mode 100644
index 00000000000..f05a6362358
--- /dev/null
+++ b/src/test/modules/test_copy_format/test_copy_format.control
@@ -0,0 +1,4 @@
+comment = 'Test code for custom COPY format'
+default_version = '1.0'
+module_pathname = '$libdir/test_copy_format'
+relocatable = true
-- 
2.47.2

