From 2908562525ef529d5a4064b067b21e114e3a42a0 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Fri, 13 Mar 2026 11:06:21 -0400
Subject: [PATCH 03/12] Global temporary tables: ON COMMIT DELETE ROWS support

Add support for the ON COMMIT DELETE ROWS clause on global temporary
tables.  The on-commit action is persisted as a reloption
(on_commit_delete) in the catalog so that all sessions can discover it
when they first access the GTT.

Because GTTs use local buffers, relation_open sets
XACT_FLAGS_ACCESSEDTEMPNAMESPACE for them the same way it does for
regular temp tables, which means the existing
PreCommit_on_commit_actions() path (iterating the on_commits list
populated by register_on_commit_action in heap_create_with_catalog)
already truncates the session-local files at commit.  We rely on it
here: the GTT is registered with ONCOMMIT_DELETE_ROWS exactly like a
regular temp table, and heap_truncate wipes the per-session file.

GttInitSessionStorage refreshes entry->on_commit_delete from
rd_options on every call.  rd_options is not populated on the very
first invocation from heap_create_with_catalog (the reloption has
not been written to pg_class yet), so the flag is not usable until
the subsequent CCI-driven relcache rebuild.  Later commits consult
the flag in PreCommit_gtt_on_commit(), which is introduced here as
a skeleton: at this stage there is no per-session state to clean
up, so the body is a stub iteration that filters the same entries
future commits will act on.  We hook it into both
CommitTransaction and PrepareTransaction, right after
PreCommit_on_commit_actions.

The on_commit_delete reloption is internal: DefineRelation and
ATExecSetRelOptions reject attempts to set or reset it directly from
user SQL.  The reloption is only added implicitly when ON COMMIT
DELETE ROWS is specified on CREATE of a GTT.

TOAST storage is reclaimed along with the heap: each heap entry in the
per-session map records its toast relation's OID (captured from the
relcache), and the commit-time truncation queues the toast heap as
well, without any catalog access from the commit-time hook.

PrepareTransaction does not call PreCommit_gtt_on_commit: any GTT
access sets XACT_FLAGS_ACCESSEDTEMPNAMESPACE, which makes the PREPARE
fail shortly afterwards, so commit-time truncation work there would
always be wasted.
---
 src/backend/access/common/reloptions.c |  13 +-
 src/backend/access/transam/xact.c      |  10 ++
 src/backend/catalog/heap.c             |  10 +-
 src/backend/catalog/storage_gtt.c      | 189 +++++++++++++++++++++++++
 src/backend/commands/tablecmds.c       |  42 ++++++
 src/include/catalog/storage_gtt.h      |   1 +
 src/include/utils/rel.h                |   1 +
 7 files changed, 263 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 3e832c3797e..7d452c5e129 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -162,6 +162,15 @@ static relopt_bool boolRelOpts[] =
 		},
 		true
 	},
+	{
+		{
+			"on_commit_delete",
+			"Truncate global temporary table data on commit",
+			RELOPT_KIND_HEAP,
+			AccessExclusiveLock
+		},
+		false
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -2025,7 +2034,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		{"vacuum_truncate", RELOPT_TYPE_TERNARY,
 		offsetof(StdRdOptions, vacuum_truncate)},
 		{"vacuum_max_eager_freeze_failure_rate", RELOPT_TYPE_REAL,
-		offsetof(StdRdOptions, vacuum_max_eager_freeze_failure_rate)}
+		offsetof(StdRdOptions, vacuum_max_eager_freeze_failure_rate)},
+		{"on_commit_delete", RELOPT_TYPE_BOOL,
+		offsetof(StdRdOptions, on_commit_delete)}
 	};
 
 	return (bytea *) build_reloptions(reloptions, validate, kind,
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 5586fbe5b07..4efb6a33f03 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -36,6 +36,7 @@
 #include "catalog/namespace.h"
 #include "catalog/pg_enum.h"
 #include "catalog/storage.h"
+#include "catalog/storage_gtt.h"
 #include "commands/async.h"
 #include "commands/tablecmds.h"
 #include "commands/trigger.h"
@@ -2352,6 +2353,9 @@ CommitTransaction(void)
 	 */
 	PreCommit_on_commit_actions();
 
+	/* Truncate ON COMMIT DELETE ROWS global temporary tables */
+	PreCommit_gtt_on_commit();
+
 	/*
 	 * Synchronize files that are created and not WAL-logged during this
 	 * transaction. This must happen before AtEOXact_RelationMap(), so that we
@@ -2614,6 +2618,12 @@ PrepareTransaction(void)
 	 */
 	PreCommit_on_commit_actions();
 
+	/*
+	 * No PreCommit_gtt_on_commit() here: any access to a GTT sets
+	 * XACT_FLAGS_ACCESSEDTEMPNAMESPACE, which makes the PREPARE fail just
+	 * below, so commit-time GTT truncation work would always be wasted.
+	 */
+
 	/*
 	 * Synchronize files that are created and not WAL-logged during this
 	 * transaction. This must happen before EndPrepare(), so that we don't see
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 0811dfed681..27b3f7e72be 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1544,9 +1544,15 @@ heap_create_with_catalog(const char *relname,
 	StoreConstraints(new_rel_desc, cooked_constraints, is_internal);
 
 	/*
-	 * If there's a special on-commit action, remember it
+	 * If there's a special on-commit action, remember it.  Global temporary
+	 * tables manage their ON COMMIT DELETE ROWS truncation through
+	 * PreCommit_gtt_on_commit instead, since heap_truncate would escalate to
+	 * AccessExclusiveLock at every commit, blocking on peers' ordinary
+	 * transaction-level locks even though only this session's private storage
+	 * is affected; skip the generic registration here for GTTs.
 	 */
-	if (oncommit != ONCOMMIT_NOOP)
+	if (oncommit != ONCOMMIT_NOOP &&
+		relpersistence != RELPERSISTENCE_GLOBAL_TEMP)
 		register_on_commit_action(relid, oncommit);
 
 	/*
diff --git a/src/backend/catalog/storage_gtt.c b/src/backend/catalog/storage_gtt.c
index 427e3e111e0..e16310a0329 100644
--- a/src/backend/catalog/storage_gtt.c
+++ b/src/backend/catalog/storage_gtt.c
@@ -21,8 +21,10 @@
  */
 #include "postgres.h"
 
+#include "access/table.h"
 #include "access/tableam.h"
 #include "access/xact.h"
+#include "catalog/heap.h"
 #include "catalog/pg_tablespace_d.h"
 #include "catalog/storage.h"
 #include "catalog/storage_gtt.h"
@@ -55,8 +57,11 @@
 typedef struct GttStorageEntry
 {
 	Oid			relid;			/* GTT's pg_class OID (hash key) */
+	Oid			toast_relid;	/* toast relation for heap entries, InvalidOid
+								 * if none / not a heap */
 	RelFileLocator locator;		/* per-session physical storage location */
 	bool		storage_created;	/* has smgr file been created? */
+	bool		on_commit_delete;	/* truncate data on commit? */
 	bool		drop_pending;	/* entry scheduled for drop at xact commit */
 	SubTransactionId create_subid;	/* subxact that added this entry */
 	SubTransactionId storage_subid; /* subxact that created current storage */
@@ -86,6 +91,7 @@ static void gtt_remove_entry(GttStorageEntry *entry);
 static void gtt_revert_storage(GttStorageEntry *entry);
 static void gtt_remove_relids(List *to_remove);
 static void gtt_init_entry(GttStorageEntry *entry, Relation relation);
+static void gtt_truncate_smgr(GttStorageEntry *entry);
 
 /*
  * ensure_gtt_hash
@@ -148,6 +154,41 @@ GttInitSessionStorage(Relation relation)
 	if (!found)
 		gtt_init_entry(entry, relation);
 
+	/*
+	 * Refresh on_commit_delete from the catalog reloption.  rd_options is not
+	 * populated on the very first call from heap_create, so the CREATE path
+	 * initially leaves this flag cleared; a subsequent relcache build (after
+	 * CCI during the same CREATE) supplies the reloption.
+	 *
+	 * The truncation itself is done from PreCommit_gtt_on_commit -- we do not
+	 * register an OnCommitItem because heap_truncate's AccessExclusiveLock
+	 * would conflict with peer sessions' session-level AccessShareLock on the
+	 * same GTT.
+	 */
+	if (relation->rd_options != NULL &&
+		relation->rd_rel->relkind == RELKIND_RELATION)
+	{
+		/*
+		 * The relkind check matters: rd_options is only StdRdOptions for
+		 * plain tables -- for other relkinds it can be a smaller
+		 * kind-specific struct, and reading on_commit_delete from it would
+		 * run off the end of the allocation.
+		 */
+		StdRdOptions *opts = (StdRdOptions *) relation->rd_options;
+
+		entry->on_commit_delete = opts->on_commit_delete;
+	}
+
+	/*
+	 * Remember the toast relation for heap entries, so the commit-time
+	 * on-commit-delete truncation can reach it without catalog access.  As
+	 * with on_commit_delete, rd_rel is not fully populated on the very first
+	 * call during CREATE; later relcache builds fill it in.
+	 */
+	if (relation->rd_rel->relkind == RELKIND_RELATION &&
+		OidIsValid(relation->rd_rel->reltoastrelid))
+		entry->toast_relid = relation->rd_rel->reltoastrelid;
+
 	/* Point the relation at our per-session storage */
 	relation->rd_locator = entry->locator;
 	relation->rd_backend = ProcNumberForTempRelations();
@@ -194,6 +235,8 @@ gtt_init_entry(GttStorageEntry *entry, Relation relation)
 	entry->create_subid = GetCurrentSubTransactionId();
 	gtt_xact_state_dirty = true;
 	entry->storage_subid = InvalidSubTransactionId;
+	entry->on_commit_delete = false;
+	entry->toast_relid = InvalidOid;
 }
 
 /*
@@ -497,6 +540,152 @@ gtt_subxact_callback(SubXactEvent event,
 	list_free(to_invalidate);
 }
 
+/*
+ * gtt_truncate_smgr
+ *		Truncate one entry's per-session storage to zero blocks via smgr.
+ *
+ * We cannot call RelationTruncate (which requires a Relation) because
+ * opening relations during commit-time hooks corrupts the relcache state
+ * that subsequent xacts rely on for DROP TABLE.  Truncating directly via
+ * smgr is sufficient: the storage is per-session and not visible to any
+ * other backend, so neither the AccessExclusiveLock RelationTruncate
+ * documents nor the relcache inval message it sends are needed for
+ * correctness here.
+ *
+ * The btree _bt_getroot fast path keeps a copy of the metapage in
+ * rd_amcache; that cache is dropped lazily by GttBuildIndexIfNeeded the
+ * next time the index is opened (added in a later commit), so we do not
+ * touch it here.
+ */
+static void
+gtt_truncate_smgr(GttStorageEntry *entry)
+{
+	SMgrRelation reln;
+	ForkNumber	forks[MAX_FORKNUM + 1];
+	BlockNumber old_blocks[MAX_FORKNUM + 1];
+	BlockNumber new_blocks[MAX_FORKNUM + 1];
+	int			nforks = 0;
+
+	if (!entry->storage_created)
+		return;
+
+	reln = smgropen(entry->locator, ProcNumberForTempRelations());
+
+	/* tolerate an already-vanished file (defense in depth) */
+	if (!smgrexists(reln, MAIN_FORKNUM))
+		return;
+
+	forks[nforks] = MAIN_FORKNUM;
+	old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
+	new_blocks[nforks] = 0;
+	nforks++;
+
+	if (smgrexists(reln, FSM_FORKNUM))
+	{
+		forks[nforks] = FSM_FORKNUM;
+		old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
+		new_blocks[nforks] = 0;
+		nforks++;
+	}
+	if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
+	{
+		forks[nforks] = VISIBILITYMAP_FORKNUM;
+		old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
+		new_blocks[nforks] = 0;
+		nforks++;
+	}
+
+	/*
+	 * Skip the truncation entirely if every fork is already empty: there is
+	 * then nothing in the local buffer pool for this relation either, so
+	 * smgrtruncate's buffer-drop pass and sinval message would be pure
+	 * overhead.  This matters because PreCommit_gtt_on_commit re-truncates
+	 * every ON COMMIT DELETE ROWS GTT the session has opened, at every
+	 * qualifying commit, written-to or not.
+	 */
+	while (nforks > 0 && old_blocks[nforks - 1] == 0)
+		nforks--;
+	if (nforks == 0)
+		return;
+
+	smgrtruncate(reln, forks, nforks, old_blocks, new_blocks);
+}
+
+/*
+ * PreCommit_gtt_on_commit
+ *		Truncate ON COMMIT DELETE ROWS GTTs at commit.
+ *
+ * Generic on-commit truncation in PreCommit_on_commit_actions cannot be
+ * used for GTTs: heap_truncate's AccessExclusiveLock would block on peers'
+ * ordinary transaction-level locks at every commit, and opening the relation
+ * via table_open at commit-time -- even with NoLock -- destabilises the
+ * relcache enough to break a subsequent DROP TABLE in the next xact.  So
+ * we register no OnCommitItem for GTTs (heap_create_with_catalog
+ * suppresses the generic registration; see register_on_commit_action()
+ * callers in heap.c) and truncate each session's local storage here
+ * directly through smgr, using the per-session locator that
+ * GttInitSessionStorage already recorded in our hash.
+ */
+void
+PreCommit_gtt_on_commit(void)
+{
+	HASH_SEQ_STATUS status;
+	GttStorageEntry *entry;
+	List	   *toast_relids = NIL;
+	ListCell   *lc;
+
+	if (gtt_storage_hash == NULL)
+		return;
+
+	/*
+	 * Match PreCommit_on_commit_actions's optimisation: skip when no temp
+	 * namespace was accessed in this xact, since any GTT we have storage for
+	 * is necessarily empty.
+	 */
+	if (!(MyXactFlags & XACT_FLAGS_ACCESSEDTEMPNAMESPACE))
+		return;
+
+	hash_seq_init(&status, gtt_storage_hash);
+	while ((entry = (GttStorageEntry *) hash_seq_search(&status)) != NULL)
+	{
+		if (!entry->on_commit_delete || !entry->storage_created)
+			continue;
+
+		/*
+		 * A heap whose main fork is already empty has not been written since
+		 * its last truncation; skip it -- and thereby its toast -- so that an
+		 * idle ON COMMIT DELETE ROWS table costs each commit no more than
+		 * this block-count probe.
+		 */
+		if (smgrnblocks(smgropen(entry->locator, ProcNumberForTempRelations()),
+						MAIN_FORKNUM) == 0)
+			continue;
+
+		gtt_truncate_smgr(entry);
+
+		/*
+		 * Queue the toast relation too (if this session ever wrote toasted
+		 * values, an entry for it exists).  Truncating just the heap would
+		 * orphan the toast rows for good: nothing else ever deletes them, and
+		 * autovacuum never visits GTTs.
+		 */
+		if (OidIsValid(entry->toast_relid))
+			toast_relids = lappend_oid(toast_relids, entry->toast_relid);
+	}
+
+	foreach(lc, toast_relids)
+	{
+		Oid			toast_relid = lfirst_oid(lc);
+
+		entry = (GttStorageEntry *) hash_search(gtt_storage_hash,
+												&toast_relid,
+												HASH_FIND, NULL);
+		if (entry != NULL && entry->storage_created)
+			gtt_truncate_smgr(entry);
+	}
+	list_free(toast_relids);
+}
+
 /*
  * gtt_session_cleanup
  *		Drop all per-session GTT storage files at backend exit.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index d4e83e5658c..5596d0f5573 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -397,6 +397,7 @@ typedef struct PartitionIndexExtDepEntry
 	((child_is_partition) ? DEPENDENCY_AUTO : DEPENDENCY_NORMAL)
 
 static void truncate_check_rel(Oid relid, Form_pg_class reltuple);
+static void CheckInternalGttReloption(List *options);
 static void truncate_check_perms(Oid relid, Form_pg_class reltuple);
 static void truncate_check_activity(Relation rel);
 static void RangeVarCallbackForTruncate(const RangeVar *relation,
@@ -787,6 +788,28 @@ static List *collectPartitionIndexExtDeps(List *partitionOids);
 static void applyPartitionIndexExtDeps(Oid newPartOid, List *extDepState);
 static void freePartitionIndexExtDeps(List *extDepState);
 
+/*
+ * CheckInternalGttReloption
+ *		Reject user-supplied settings of internal GTT reloptions.
+ *
+ * The on_commit_delete reloption is internal: it persists the ON COMMIT
+ * DELETE ROWS action for a global temporary table so other sessions can
+ * discover it.  Users must not set or reset it directly via CREATE TABLE
+ * ... WITH (...) or ALTER TABLE SET/RESET (...).
+ */
+static void
+CheckInternalGttReloption(List *options)
+{
+	foreach_node(DefElem, def, options)
+	{
+		if (strcmp(def->defname, "on_commit_delete") == 0)
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("on_commit_delete is an internal reloption and cannot be set directly"),
+					errhint("Use ON COMMIT DELETE ROWS when creating a global temporary table."));
+	}
+}
+
 /* ----------------------------------------------------------------
  *		DefineRelation
  *				Creates a new relation.
@@ -979,6 +1002,22 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 	if (!OidIsValid(ownerId))
 		ownerId = GetUserId();
 
+	/* Reject direct use of internal GTT reloptions */
+	CheckInternalGttReloption(stmt->options);
+
+	/*
+	 * For global temporary tables with ON COMMIT DELETE ROWS, persist the
+	 * on-commit action as a reloption so that other sessions can discover it.
+	 */
+	if (stmt->oncommit == ONCOMMIT_DELETE_ROWS
+		&& stmt->relation->relpersistence == RELPERSISTENCE_GLOBAL_TEMP)
+	{
+		DefElem    *def = makeDefElem("on_commit_delete",
+									  (Node *) makeBoolean(true), -1);
+
+		stmt->options = lappend(stmt->options, def);
+	}
+
 	/*
 	 * Parse and validate reloptions, if any.
 	 */
@@ -16985,6 +17024,9 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
 	if (defList == NIL && operation != AT_ReplaceRelOptions)
 		return;					/* nothing to do */
 
+	/* Reject direct SET/RESET of internal GTT reloptions */
+	CheckInternalGttReloption(defList);
+
 	pgclass = table_open(RelationRelationId, RowExclusiveLock);
 
 	/* Fetch heap tuple */
diff --git a/src/include/catalog/storage_gtt.h b/src/include/catalog/storage_gtt.h
index cefa5b5dbca..9e8b8f1b713 100644
--- a/src/include/catalog/storage_gtt.h
+++ b/src/include/catalog/storage_gtt.h
@@ -19,5 +19,6 @@ extern void GttInitSessionStorage(Relation relation);
 extern void GttEnsureSessionStorage(Relation relation);
 extern bool GttHasSessionStorage(Oid relid);
 extern void GttScheduleDropSessionStorage(Oid relid);
+extern void PreCommit_gtt_on_commit(void);
 
 #endif							/* STORAGE_GTT_H */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 696591b7d22..d711113313d 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -356,6 +356,7 @@ typedef struct StdRdOptions
 	 * to freeze. 0 if disabled, -1 if unspecified.
 	 */
 	double		vacuum_max_eager_freeze_failure_rate;
+	bool		on_commit_delete;	/* GTT: truncate data on commit */
 } StdRdOptions;
 
 #define HEAP_MIN_FILLFACTOR			10
-- 
2.43.0

