From 1684e2394e5557f94c17e39b94351a76e601e00d Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Mon, 22 Jun 2026 09:21:51 -0700
Subject: [PATCH v2 2/4] Allow extensions to register custom format to COPY TO
 and COPY FROM.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
---
 src/backend/commands/Makefile     |   1 +
 src/backend/commands/copy.c       |  97 ++++++++++++++++++++--
 src/backend/commands/copyapi.c    | 131 ++++++++++++++++++++++++++++++
 src/backend/commands/copyfrom.c   |   4 +-
 src/backend/commands/copyto.c     |   4 +-
 src/backend/commands/meson.build  |   1 +
 src/include/commands/copy.h       |  19 +++++
 src/include/commands/copy_state.h |   6 ++
 src/include/commands/copyapi.h    |  37 +++++++++
 src/tools/pgindent/typedefs.list  |   1 +
 10 files changed, 290 insertions(+), 11 deletions(-)
 create mode 100644 src/backend/commands/copyapi.c

diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
index 5b9d084977e..17b7aa08b55 100644
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -23,6 +23,7 @@ OBJS = \
 	constraint.o \
 	conversioncmds.o \
 	copy.o \
+	copyapi.o \
 	copyfrom.o \
 	copyfromparse.o \
 	copyto.o \
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 003b70852bb..2fdba026ee0 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -23,6 +23,7 @@
 #include "access/xact.h"
 #include "catalog/pg_authid.h"
 #include "commands/copy.h"
+#include "commands/copyapi.h"
 #include "commands/defrem.h"
 #include "executor/executor.h"
 #include "mb/pg_wchar.h"
@@ -592,6 +593,14 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		force_array_specified = false;
 	ListCell   *option;
 
+	/*
+	 * Options not recognized by core are collected here and, once the format
+	 * is known, either handed to a custom format's option parser or rejected.
+	 */
+	List	   *deferred_options = NIL;
+	ProcessOneOptionFn custom_process_option_fn = NULL;
+	char	   *custom_format_name = NULL;
+
 	/* Support external use for option sanity checking */
 	if (opts_out == NULL)
 		opts_out = palloc0_object(CopyFormatOptions);
@@ -620,6 +629,13 @@ ProcessCopyOptions(ParseState *pstate,
 				opts_out->format = COPY_FORMAT_BINARY;
 			else if (strcmp(fmt, "json") == 0)
 				opts_out->format = COPY_FORMAT_JSON;
+			else if (GetCopyCustomFormatRoutines(fmt, &opts_out->to_routine,
+												 &opts_out->from_routine,
+												 &custom_process_option_fn))
+			{
+				opts_out->format = COPY_FORMAT_CUSTOM;
+				custom_format_name = fmt;
+			}
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -775,11 +791,54 @@ ProcessCopyOptions(ParseState *pstate,
 			opts_out->reject_limit = defGetCopyRejectLimitOption(defel);
 		}
 		else
+		{
+			/*
+			 * Not a core option.  Defer the check to after the loop as it may
+			 * belong to a custom format whose "format" option has not been
+			 * seen yet.
+			 */
+			deferred_options = lappend(deferred_options, defel);
+		}
+	}
+
+	/*
+	 * Now that the format and every option have been seen, resolve the
+	 * deferred options.
+	 */
+	if (deferred_options != NIL)
+	{
+		/*
+		 * For a custom format, they belong to the handler; for any built-in
+		 * (including the default) an unrecognized option is an error,
+		 * preserving the historical behavior relied on by external callers
+		 * such as file_fdw.
+		 */
+		if (opts_out->format != COPY_FORMAT_CUSTOM || custom_process_option_fn == NULL)
+		{
+			DefElem    *defel = linitial_node(DefElem, deferred_options);
+
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("option \"%s\" not recognized",
 							defel->defname),
 					 parser_errposition(pstate, defel->location)));
+		}
+
+		/*
+		 * Hand each option core did not recognize to the format's per-option
+		 * callback. Anything the format does not claim (or any option at all
+		 * if it has no callback) is an error, so an unrecognized option
+		 * always fails here.
+		 */
+		foreach_node(DefElem, opt, deferred_options)
+		{
+			if (!custom_process_option_fn(opts_out, is_from, opt))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("COPY format \"%s\" does not accept option \"%s\"",
+								custom_format_name, opt->defname),
+						 parser_errposition(pstate, opt->location)));
+		}
 	}
 
 	/*
@@ -869,7 +928,7 @@ ProcessCopyOptions(ParseState *pstate,
 	 * future-proofing.  Likewise we disallow all digits though only octal
 	 * digits are actually dangerous.
 	 */
-	if (opts_out->format != COPY_FORMAT_CSV &&
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
 		strchr("\\.abcdefghijklmnopqrstuvwxyz0123456789",
 			   opts_out->delim[0]) != NULL)
 		ereport(ERROR,
@@ -888,7 +947,8 @@ ProcessCopyOptions(ParseState *pstate,
 				: errmsg("cannot specify %s in JSON mode", "HEADER"));
 
 	/* Check quote */
-	if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL)
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
+		opts_out->quote != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -905,7 +965,8 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter and quote must be different")));
 
 	/* Check escape */
-	if (opts_out->format != COPY_FORMAT_CSV && opts_out->escape != NULL)
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
+		opts_out->escape != NULL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -917,7 +978,8 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY escape must be a single one-byte character")));
 
 	/* Check force_quote */
-	if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_quote || opts_out->force_quote_all))
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
+		(opts_out->force_quote || opts_out->force_quote_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -931,8 +993,8 @@ ProcessCopyOptions(ParseState *pstate,
 						"COPY FROM")));
 
 	/* Check force_notnull */
-	if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_notnull != NIL ||
-												opts_out->force_notnull_all))
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
+		(opts_out->force_notnull != NIL || opts_out->force_notnull_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -947,8 +1009,8 @@ ProcessCopyOptions(ParseState *pstate,
 						"COPY TO")));
 
 	/* Check force_null */
-	if (opts_out->format != COPY_FORMAT_CSV && (opts_out->force_null != NIL ||
-												opts_out->force_null_all))
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_CSV &&
+		(opts_out->force_null != NIL || opts_out->force_null_all))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
@@ -995,7 +1057,8 @@ ProcessCopyOptions(ParseState *pstate,
 				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				errmsg("COPY %s is not supported for %s", "FORMAT JSON", "COPY FROM"));
 
-	if (opts_out->format != COPY_FORMAT_JSON && opts_out->force_array)
+	if (CopyFormatBuiltins(opts_out->format) && opts_out->format != COPY_FORMAT_JSON &&
+		opts_out->force_array)
 		ereport(ERROR,
 				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				errmsg("COPY %s can only be used with JSON mode", "FORCE_ARRAY"));
@@ -1048,6 +1111,22 @@ ProcessCopyOptions(ParseState *pstate,
 		 * ON_ERROR, third is the value of the COPY option, e.g. IGNORE */
 				 errmsg("COPY %s requires %s to be set to %s",
 						"REJECT_LIMIT", "ON_ERROR", "IGNORE")));
+
+	/* Check custom format routines */
+	if (opts_out->format == COPY_FORMAT_CUSTOM)
+	{
+		if (is_from && opts_out->from_routine == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("COPY format \"%s\" cannot be used with COPY FROM",
+							custom_format_name)));
+
+		if (!is_from && opts_out->to_routine == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("COPY format \"%s\" cannot be used with COPY TO",
+							custom_format_name)));
+	}
 }
 
 /*
diff --git a/src/backend/commands/copyapi.c b/src/backend/commands/copyapi.c
new file mode 100644
index 00000000000..168efbcf30b
--- /dev/null
+++ b/src/backend/commands/copyapi.c
@@ -0,0 +1,131 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyapi.c
+ *	  Registry for pluggable COPY TO/FROM format handlers.
+ *
+ * The built-in formats (text, csv, binary, json) are dispatched directly by
+ * the COPY engine. Extensions can provide additional formats by registering
+ * a CopyToRoutine and/or CopyFromRoutine under a name from their _PG_init();
+ * ProcessCopyOptions() then resolves "COPY ... (FORMAT 'name')" against this
+ * registry.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/copyapi.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/copyapi.h"
+#include "utils/memutils.h"
+
+typedef struct CopyCustomFormatEntry
+{
+	const char *name;			/* constant string; never freed (see below) */
+	const CopyToRoutine *to_routine;
+	const CopyFromRoutine *from_routine;
+	ProcessOneOptionFn option_fn;
+} CopyCustomFormatEntry;
+
+static CopyCustomFormatEntry *CopyCustomFormatArray = NULL;
+static int	CopyCustomFormatsAssigned = 0;
+static int	CopyCustomFormatsAllocated = 0;
+
+/* Is 'name' one of the built-in format keywords? */
+static bool
+is_builtin_copy_format(const char *name)
+{
+	return (strcmp(name, "text") == 0 ||
+			strcmp(name, "csv") == 0 ||
+			strcmp(name, "binary") == 0 ||
+			strcmp(name, "json") == 0);
+}
+
+/*
+ * Register a custom COPY format. Intended to be called from an extension's
+ * _PG_init(). Either routine may be NULL if the format does not support that
+ * direction (but not both).
+ *
+ * 'option_fn' may also be NULL if the format takes no format-specific options.
+ *
+ * 'name' is assumed to be a constant string or allocated in storage that will
+ * never be freed; it is stored by reference.
+ */
+void
+RegisterCopyCustomFormat(const char *name, const CopyToRoutine *to,
+						 const CopyFromRoutine *from, ProcessOneOptionFn option_fn)
+{
+	Assert(name != NULL && name[0] != '\0');
+
+	/* Must support at least one direction */
+	Assert(to != NULL || from != NULL);
+
+	Assert(to == NULL ||
+		   (to->CopyToStart != NULL && to->CopyToOneRow != NULL &&
+			to->CopyToEnd != NULL));
+	Assert(from == NULL ||
+		   (from->CopyFromStart != NULL && from->CopyFromOneRow != NULL &&
+			from->CopyFromEnd != NULL));
+
+	/* Check if it's already used by built-in format names */
+	if (is_builtin_copy_format(name))
+		elog(ERROR, "COPY format \"%s\" is a built-in format name", name);
+
+	/* Reject a duplicate registration. */
+	for (int i = 0; i < CopyCustomFormatsAssigned; i++)
+	{
+		if (strcmp(CopyCustomFormatArray[i].name, name) == 0)
+			elog(ERROR, "COPY format \"%s\" is already registered", name);
+	}
+
+	/* Create the array on first use; it must outlive the current context. */
+	if (CopyCustomFormatArray == NULL)
+	{
+		CopyCustomFormatsAllocated = 16;
+		CopyCustomFormatArray = (CopyCustomFormatEntry *)
+			MemoryContextAlloc(TopMemoryContext,
+							   CopyCustomFormatsAllocated * sizeof(CopyCustomFormatEntry));
+	}
+
+	/* Expand if full. */
+	if (CopyCustomFormatsAssigned >= CopyCustomFormatsAllocated)
+	{
+		CopyCustomFormatsAllocated *= 2;
+		CopyCustomFormatArray = (CopyCustomFormatEntry *)
+			repalloc_array(CopyCustomFormatArray, CopyCustomFormatEntry, CopyCustomFormatsAllocated);
+	}
+
+	CopyCustomFormatArray[CopyCustomFormatsAssigned].name = name;
+	CopyCustomFormatArray[CopyCustomFormatsAssigned].to_routine = to;
+	CopyCustomFormatArray[CopyCustomFormatsAssigned].from_routine = from;
+	CopyCustomFormatArray[CopyCustomFormatsAssigned].option_fn = option_fn;
+	CopyCustomFormatsAssigned++;
+}
+
+/*
+ * Look up a previously registered custom format. Returns false if 'name' is
+ * not registered. Out-parameters may be NULL if not wanted.
+ */
+bool
+GetCopyCustomFormatRoutines(const char *name, const CopyToRoutine **to,
+							const CopyFromRoutine **from, ProcessOneOptionFn * option_fn)
+{
+	for (int i = 0; i < CopyCustomFormatsAssigned; i++)
+	{
+		if (strcmp(CopyCustomFormatArray[i].name, name) == 0)
+		{
+			if (to)
+				*to = CopyCustomFormatArray[i].to_routine;
+			if (from)
+				*from = CopyCustomFormatArray[i].from_routine;
+			if (option_fn)
+				*option_fn = CopyCustomFormatArray[i].option_fn;
+
+			return true;
+		}
+	}
+	return false;
+}
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 2c57b32f4de..69ec94c9ec1 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -158,7 +158,9 @@ static const CopyFromRoutine CopyFromRoutineBinary = {
 static const CopyFromRoutine *
 CopyFromGetRoutine(const CopyFormatOptions *opts)
 {
-	if (opts->format == COPY_FORMAT_CSV)
+	if (opts->format == COPY_FORMAT_CUSTOM)
+		return opts->from_routine;
+	else if (opts->format == COPY_FORMAT_CSV)
 		return &CopyFromRoutineCSV;
 	else if (opts->format == COPY_FORMAT_BINARY)
 		return &CopyFromRoutineBinary;
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ef2038c9a5d..f897f23737f 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -130,7 +130,9 @@ static const CopyToRoutine CopyToRoutineBinary = {
 static const CopyToRoutine *
 CopyToGetRoutine(const CopyFormatOptions *opts)
 {
-	if (opts->format == COPY_FORMAT_CSV)
+	if (opts->format == COPY_FORMAT_CUSTOM)
+		return opts->to_routine;
+	else if (opts->format == COPY_FORMAT_CSV)
 		return &CopyToRoutineCSV;
 	else if (opts->format == COPY_FORMAT_BINARY)
 		return &CopyToRoutineBinary;
diff --git a/src/backend/commands/meson.build b/src/backend/commands/meson.build
index 9f258d566eb..d98273da67e 100644
--- a/src/backend/commands/meson.build
+++ b/src/backend/commands/meson.build
@@ -11,6 +11,7 @@ backend_sources += files(
   'constraint.c',
   'conversioncmds.c',
   'copy.c',
+  'copyapi.c',
   'copyfrom.c',
   'copyfromparse.c',
   'copyto.c',
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 5e710efff5b..9c40ca4ba09 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -58,7 +58,16 @@ typedef enum CopyFormat
 	COPY_FORMAT_BINARY,
 	COPY_FORMAT_CSV,
 	COPY_FORMAT_JSON,
+	COPY_FORMAT_CUSTOM,			/* format provided by an extension */
 } CopyFormat;
+#define CopyFormatBuiltins(format) ((format) != COPY_FORMAT_CUSTOM)
+
+/*
+ * Full definitions live in commands/copyapi.h, which includes this header;
+ * CopyFormatOptions only needs to hold pointers to the resolved routines.
+ */
+struct CopyToRoutine;
+struct CopyFromRoutine;
 
 /*
  * A struct to hold COPY options, in a parsed form. All of these are related
@@ -97,6 +106,16 @@ typedef struct CopyFormatOptions
 	CopyLogVerbosityChoice log_verbosity;	/* verbosity of logged messages */
 	int64		reject_limit;	/* maximum tolerable number of errors */
 	List	   *convert_select; /* list of column names (can be NIL) */
+
+	/*
+	 * Resolved handler for a custom format. The directoin not in use may be
+	 * NULL. For built-in formats these are unused.
+	 */
+	const struct CopyToRoutine *to_routine;
+	const struct CopyFromRoutine *from_routine;
+
+	/* Custom format private option data */
+	void	   *format_private_opts;
 } CopyFormatOptions;
 
 /* These are defined in copy_state.h */
diff --git a/src/include/commands/copy_state.h b/src/include/commands/copy_state.h
index 52cbf5067eb..6c5defbf4ee 100644
--- a/src/include/commands/copy_state.h
+++ b/src/include/commands/copy_state.h
@@ -178,6 +178,9 @@ typedef struct CopyFromStateData
 #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
 
 	uint64		bytes_processed;	/* number of bytes processed so far */
+
+	/* Custom format private data to store the state */
+	void	   *format_private;
 } CopyFromStateData;
 
 /*
@@ -248,6 +251,9 @@ typedef struct CopyToStateData
 	FmgrInfo   *out_functions;	/* lookup info for output functions */
 	MemoryContext rowcontext;	/* per-row evaluation context */
 	uint64		bytes_processed;	/* number of bytes processed so far */
+
+	/* Custom format private data to store the state */
+	void	   *format_private;
 } CopyToStateData;
 
 #endif							/* COPY_STATE_H */
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 398e7a78bb3..8eb5fe9c7dc 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -14,6 +14,7 @@
 #ifndef COPYAPI_H
 #define COPYAPI_H
 
+#include "commands/copy_state.h"
 #include "commands/copy.h"
 
 /*
@@ -102,4 +103,40 @@ typedef struct CopyFromRoutine
 	void		(*CopyFromEnd) (CopyFromState cstate);
 } CopyFromRoutine;
 
+/*
+ * Optional callback to process one format-specific COPY option. Invoked
+ * from ProcessCopyOptions() once per option that core did not recognize, after
+ * every core option has been parsed (so 'opts' is fully populated).
+ *
+ * Returns true if the option belongs to the format and is valid. Returns false
+ * if the option is not one the format recognizes, in which case core raises the
+ * "not accepted" error; thus an unrecognized option always errors, whether or
+ * not the format supplies this callback. For a recognized option with an invalid
+ * value, the callback should ereport() itself.
+ *
+ * 'pstate' may be NULL (e.g. when options are checked outside a real COPY, as
+ * file_fdw does); parser_errposition(pstate, ...) tolerates NULL.
+ */
+typedef bool (*ProcessOneOptionFn) (CopyFormatOptions *opts, bool is_from,
+									DefElem *option);
+
+/*
+ * Register a COPY format under 'name', mapping it to its TO and/or FROM
+ * routines and optional option/validation callbacks. Intended to be called
+ * from an extension's _PG_init(). Either routine may be NULL if the format
+ * does not support that direction (but not both). Errors if 'name' collides
+ * with a built-in format or one already registered.
+ */
+extern void RegisterCopyCustomFormat(const char *name, const CopyToRoutine *to,
+									 const CopyFromRoutine *from,
+									 ProcessOneOptionFn option_fn);
+
+/*
+ * Look up a previously registered custom format. Returns false if 'name' is
+ * not registered. Out-parameters may be NULL if not wanted.
+ */
+extern bool GetCopyCustomFormatRoutines(const char *name, const CopyToRoutine **to,
+										const CopyFromRoutine **from,
+										ProcessOneOptionFn * option_fn);
+
 #endif							/* COPYAPI_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1969d467c1d..5263710e451 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -540,6 +540,7 @@ ConvProcInfo
 ConversionLocation
 ConvertRowtypeExpr
 CookedConstraint
+CopyCustomFormatEntry
 CopyDest
 CopyFormat
 CopyFormatOptions
-- 
2.54.0

