| From: | Mingwei Jia <i(at)nayishan(dot)top> |
|---|---|
| To: | pgsql-hackers(at)lists(dot)postgresql(dot)org |
| Subject: | [RFC PATCH v2 RESEND 04/10] umbra: add patch 3 metadata disk format and identity mapping bootstrap |
| Date: | 2026-06-01 23:33:34 |
| Message-ID: | 20260601233340.67949-3-i@nayishan.top |
| Views: | Whole Thread | Raw Message | Download mbox | Resend email |
| Thread: | |
| Lists: | pgsql-hackers |
---
src/backend/catalog/storage.c | 3 +
src/backend/storage/Makefile | 5 +
src/backend/storage/buffer/bufmgr.c | 2 +
src/backend/storage/map/Makefile | 19 ++
src/backend/storage/map/map.c | 162 +++++++++++++
src/backend/storage/map/mapsuper.c | 338 ++++++++++++++++++++++++++++
src/backend/storage/map/meson.build | 6 +
src/backend/storage/meson.build | 3 +
src/backend/storage/smgr/smgr.c | 47 ++++
src/backend/storage/smgr/umbra.c | 158 ++++++++++++-
src/include/storage/map.h | 53 +++++
src/include/storage/mapsuper.h | 100 ++++++++
src/include/storage/smgr.h | 6 +
src/include/storage/umbra.h | 7 +
14 files changed, 906 insertions(+), 3 deletions(-)
create mode 100644 src/backend/storage/map/Makefile
create mode 100644 src/backend/storage/map/map.c
create mode 100644 src/backend/storage/map/mapsuper.c
create mode 100644 src/backend/storage/map/meson.build
create mode 100644 src/include/storage/map.h
create mode 100644 src/include/storage/mapsuper.h
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index e443a4993c..6b69329a52 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -150,6 +150,8 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
srel = smgropen(rlocator, procNumber);
smgrcreate(srel, MAIN_FORKNUM, false);
+ if (needs_wal)
+ smgrcreaterelationmetadata(srel);
if (needs_wal)
log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
@@ -1014,6 +1016,7 @@ smgr_redo(XLogReaderState *record)
* log as best we can until the drop is seen.
*/
smgrcreate(reln, MAIN_FORKNUM, true);
+ smgrcreaterelationmetadata(reln);
/*
* Before we perform the truncation, update minimum recovery point to
diff --git a/src/backend/storage/Makefile b/src/backend/storage/Makefile
index 2afb42ca96..b07ba46dbb 100644
--- a/src/backend/storage/Makefile
+++ b/src/backend/storage/Makefile
@@ -20,4 +20,9 @@ SUBDIRS = \
smgr \
sync
+ifeq ($(with_umbra), yes)
+SUBDIRS += \
+ map
+endif
+
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 3cc0b0bdd9..540f346d53 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5505,6 +5505,8 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator,
permanent);
}
}
+
+ smgrcopyrelationmetadata(src_rel, dst_rel, relpersistence);
}
/* ---------------------------------------------------------------------
diff --git a/src/backend/storage/map/Makefile b/src/backend/storage/map/Makefile
new file mode 100644
index 0000000000..ee9603de14
--- /dev/null
+++ b/src/backend/storage/map/Makefile
@@ -0,0 +1,19 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for storage/map (Umbra mapping subsystem)
+#
+# IDENTIFICATION
+# src/backend/storage/map/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/storage/map
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+ map.o \
+ mapsuper.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/map/map.c b/src/backend/storage/map/map.c
new file mode 100644
index 0000000000..563f38b21a
--- /dev/null
+++ b/src/backend/storage/map/map.c
@@ -0,0 +1,162 @@
+/*-------------------------------------------------------------------------
+ *
+ * map.c
+ * Umbra metadata-fork disk layout helpers.
+ *
+ * This file contains address-translation and in-page access routines for the
+ * metadata fork disk layout.
+ *
+ * src/backend/storage/map/map.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/map.h"
+#include "storage/um_defs.h"
+
+void
+MapPageInit(MapPage *page)
+{
+ Assert(page != NULL);
+
+ MemSet(page->pblknos, 0xFF, sizeof(page->pblknos));
+}
+
+BlockNumber
+MapPageGetEntry(const MapPage *page, int entry_idx)
+{
+ Assert(page != NULL);
+
+ if (entry_idx < 0 || entry_idx >= MAP_ENTRIES_PER_PAGE)
+ elog(ERROR, "map entry index %d is out of range", entry_idx);
+
+ return page->pblknos[entry_idx];
+}
+
+void
+MapPageSetEntry(MapPage *page, int entry_idx, BlockNumber pblkno)
+{
+ Assert(page != NULL);
+
+ if (entry_idx < 0 || entry_idx >= MAP_ENTRIES_PER_PAGE)
+ elog(ERROR, "map entry index %d is out of range", entry_idx);
+
+ page->pblknos[entry_idx] = pblkno;
+}
+
+BlockNumber
+MapForkPageIndexToMapBlkno(ForkNumber forknum, BlockNumber fork_page_idx)
+{
+ uint64 group_no;
+ uint64 blkno64;
+
+ if (forknum == UMBRA_METADATA_FORKNUM)
+ elog(ERROR, "Umbra metadata fork cannot be addressed as a map target");
+
+ switch (forknum)
+ {
+ case FSM_FORKNUM:
+ group_no = (uint64) fork_page_idx;
+ blkno64 = (uint64) MAP_BLOCK_FIRST_GROUP +
+ group_no * (uint64) MAP_GROUP_TOTAL_PAGES;
+ break;
+
+ case VISIBILITYMAP_FORKNUM:
+ group_no = (uint64) fork_page_idx;
+ blkno64 = (uint64) MAP_BLOCK_FIRST_GROUP +
+ group_no * (uint64) MAP_GROUP_TOTAL_PAGES +
+ (uint64) MAP_GROUP_FSM_PAGES;
+ break;
+
+ case MAIN_FORKNUM:
+ {
+ uint64 group_page_idx = (uint64) fork_page_idx;
+
+ group_no = group_page_idx / (uint64) MAP_GROUP_MAIN_PAGES;
+ blkno64 = (uint64) MAP_BLOCK_FIRST_GROUP +
+ group_no * (uint64) MAP_GROUP_TOTAL_PAGES +
+ (uint64) MAP_GROUP_FSM_PAGES +
+ (uint64) MAP_GROUP_VM_PAGES +
+ (group_page_idx % (uint64) MAP_GROUP_MAIN_PAGES);
+ break;
+ }
+
+ default:
+ elog(ERROR, "unsupported fork number %d in map layout", (int) forknum);
+ pg_unreachable();
+ }
+
+ if (blkno64 > (uint64) MaxBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("cannot address map page %u for fork %d",
+ fork_page_idx, forknum)));
+
+ return (BlockNumber) blkno64;
+}
+
+BlockNumber
+MapLblknoToMapBlkno(ForkNumber forknum, BlockNumber lblkno)
+{
+ BlockNumber fork_page_idx;
+ uint64 entry64;
+
+ fork_page_idx = lblkno / MAP_ENTRIES_PER_PAGE;
+ entry64 = (uint64) MapForkPageIndexToMapBlkno(forknum, fork_page_idx) *
+ (uint64) MAP_ENTRIES_PER_PAGE +
+ (uint64) (lblkno % MAP_ENTRIES_PER_PAGE);
+
+ if (entry64 > (uint64) MaxBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("cannot address logical block %u for fork %d in map",
+ lblkno, forknum)));
+
+ return (BlockNumber) entry64;
+}
+
+bool
+MapDecodeMapBlkno(BlockNumber map_blkno, ForkNumber *forknum,
+ BlockNumber *fork_page_idx)
+{
+ uint64 offset;
+ uint64 group_no;
+ uint64 in_group;
+
+ Assert(forknum != NULL);
+ Assert(fork_page_idx != NULL);
+
+ if (map_blkno == MAP_BLOCK_SUPER || map_blkno < MAP_BLOCK_FIRST_GROUP)
+ return false;
+
+ offset = (uint64) (map_blkno - MAP_BLOCK_FIRST_GROUP);
+ group_no = offset / (uint64) MAP_GROUP_TOTAL_PAGES;
+ in_group = offset % (uint64) MAP_GROUP_TOTAL_PAGES;
+
+ if (in_group < (uint64) MAP_GROUP_FSM_PAGES)
+ {
+ *forknum = FSM_FORKNUM;
+ *fork_page_idx = (BlockNumber) group_no;
+ return true;
+ }
+
+ in_group -= (uint64) MAP_GROUP_FSM_PAGES;
+ if (in_group < (uint64) MAP_GROUP_VM_PAGES)
+ {
+ *forknum = VISIBILITYMAP_FORKNUM;
+ *fork_page_idx = (BlockNumber) group_no;
+ return true;
+ }
+
+ in_group -= (uint64) MAP_GROUP_VM_PAGES;
+ if (in_group < (uint64) MAP_GROUP_MAIN_PAGES)
+ {
+ *forknum = MAIN_FORKNUM;
+ *fork_page_idx = (BlockNumber)
+ (group_no * (uint64) MAP_GROUP_MAIN_PAGES + in_group);
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/backend/storage/map/mapsuper.c b/src/backend/storage/map/mapsuper.c
new file mode 100644
index 0000000000..b376d513fd
--- /dev/null
+++ b/src/backend/storage/map/mapsuper.c
@@ -0,0 +1,338 @@
+/*-------------------------------------------------------------------------
+ *
+ * mapsuper.c
+ * Umbra metadata superblock helpers.
+ *
+ * This file contains on-disk superblock encoding and direct metadata-file I/O
+ * helpers.
+ *
+ * src/backend/storage/map/mapsuper.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/map.h"
+#include "storage/mapsuper.h"
+#include "storage/umbra.h"
+
+static void MapSBlockReportCorrupt(SMgrRelation reln, const char *reason);
+
+void
+MapSuperblockRefreshCRC(MapSuperblock *super)
+{
+ pg_crc32c crc;
+
+ Assert(super != NULL);
+
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc, &super->data, offsetof(MapSuperblockData, crc));
+ FIN_CRC32C(crc);
+ super->data.crc = crc;
+}
+
+bool
+MapSuperblockCheckCRC(const MapSuperblock *super)
+{
+ pg_crc32c crc;
+
+ Assert(super != NULL);
+
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc, &super->data, offsetof(MapSuperblockData, crc));
+ FIN_CRC32C(crc);
+
+ return crc == super->data.crc;
+}
+
+void
+MapSuperblockInit(MapSuperblock *super, uint32 flags)
+{
+ Assert(super != NULL);
+
+ MemSet(super, 0, sizeof(*super));
+
+ super->data.magic = MAP_SUPERBLOCK_MAGIC;
+ super->data.version = MAP_SUPERBLOCK_VERSION;
+ super->data.blcksz = BLCKSZ;
+ super->data.flags = flags;
+ super->data.next_free_phys_block_fsm = InvalidBlockNumber;
+ super->data.phys_capacity_fsm = InvalidBlockNumber;
+ super->data.next_free_phys_block_vm = InvalidBlockNumber;
+ super->data.phys_capacity_vm = InvalidBlockNumber;
+ super->data.logical_nblocks_fsm = InvalidBlockNumber;
+ super->data.logical_nblocks_vm = InvalidBlockNumber;
+ super->data.last_updated_lsn = InvalidXLogRecPtr;
+ super->data.crc = 0;
+}
+
+bool
+MapSuperblockHasValidIdentity(const MapSuperblock *super)
+{
+ Assert(super != NULL);
+
+ if (super->data.magic != MAP_SUPERBLOCK_MAGIC)
+ return false;
+ if (super->data.version != MAP_SUPERBLOCK_VERSION)
+ return false;
+ if (super->data.blcksz != BLCKSZ)
+ return false;
+
+ return true;
+}
+
+bool
+MapSuperblockIsValid(const MapSuperblock *super)
+{
+ Assert(super != NULL);
+
+ if (!MapSuperblockHasValidIdentity(super))
+ return false;
+
+ return MapSuperblockCheckCRC(super);
+}
+
+void
+MapSuperblockSetFlags(MapSuperblock *super, uint32 flags)
+{
+ Assert(super != NULL);
+
+ super->data.flags = flags;
+}
+
+uint32
+MapSuperblockGetFlags(const MapSuperblock *super)
+{
+ Assert(super != NULL);
+
+ return super->data.flags;
+}
+
+void
+MapSuperblockSetLastUpdatedLSN(MapSuperblock *super, XLogRecPtr lsn)
+{
+ Assert(super != NULL);
+
+ super->data.last_updated_lsn = lsn;
+}
+
+XLogRecPtr
+MapSuperblockGetLastUpdatedLSN(const MapSuperblock *super)
+{
+ Assert(super != NULL);
+
+ return super->data.last_updated_lsn;
+}
+
+BlockNumber
+MapSuperblockGetNextFreePhysBlock(const MapSuperblock *super, ForkNumber forknum)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ return super->data.next_free_phys_block_main;
+ case FSM_FORKNUM:
+ return super->data.next_free_phys_block_fsm;
+ case VISIBILITYMAP_FORKNUM:
+ return super->data.next_free_phys_block_vm;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+
+ pg_unreachable();
+}
+
+void
+MapSuperblockSetNextFreePhysBlock(MapSuperblock *super, ForkNumber forknum,
+ BlockNumber blkno)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ super->data.next_free_phys_block_main = blkno;
+ break;
+ case FSM_FORKNUM:
+ super->data.next_free_phys_block_fsm = blkno;
+ break;
+ case VISIBILITYMAP_FORKNUM:
+ super->data.next_free_phys_block_vm = blkno;
+ break;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+}
+
+BlockNumber
+MapSuperblockGetPhysCapacity(const MapSuperblock *super, ForkNumber forknum)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ return super->data.phys_capacity_main;
+ case FSM_FORKNUM:
+ return super->data.phys_capacity_fsm;
+ case VISIBILITYMAP_FORKNUM:
+ return super->data.phys_capacity_vm;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+
+ pg_unreachable();
+}
+
+void
+MapSuperblockSetPhysCapacity(MapSuperblock *super, ForkNumber forknum,
+ BlockNumber blkno)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ super->data.phys_capacity_main = blkno;
+ break;
+ case FSM_FORKNUM:
+ super->data.phys_capacity_fsm = blkno;
+ break;
+ case VISIBILITYMAP_FORKNUM:
+ super->data.phys_capacity_vm = blkno;
+ break;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+}
+
+BlockNumber
+MapSuperblockGetLogicalNblocks(const MapSuperblock *super, ForkNumber forknum)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ return super->data.logical_nblocks_main;
+ case FSM_FORKNUM:
+ return super->data.logical_nblocks_fsm;
+ case VISIBILITYMAP_FORKNUM:
+ return super->data.logical_nblocks_vm;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+
+ pg_unreachable();
+}
+
+void
+MapSuperblockSetLogicalNblocks(MapSuperblock *super, ForkNumber forknum,
+ BlockNumber nblocks)
+{
+ Assert(super != NULL);
+
+ switch (forknum)
+ {
+ case MAIN_FORKNUM:
+ super->data.logical_nblocks_main = nblocks;
+ break;
+ case FSM_FORKNUM:
+ super->data.logical_nblocks_fsm = nblocks;
+ break;
+ case VISIBILITYMAP_FORKNUM:
+ super->data.logical_nblocks_vm = nblocks;
+ break;
+ default:
+ elog(ERROR, "unsupported fork number for superblock: %d", forknum);
+ }
+}
+
+void
+MapSuperblockPackPage(const MapSuperblock *super, char page[BLCKSZ])
+{
+ Assert(super != NULL);
+ Assert(page != NULL);
+
+ MemSet(page, 0, BLCKSZ);
+ memcpy(page, super->padding, MAP_SUPERBLOCK_SIZE);
+}
+
+void
+MapSuperblockUnpackPage(MapSuperblock *super, const char page[BLCKSZ])
+{
+ Assert(super != NULL);
+ Assert(page != NULL);
+
+ memcpy(super->padding, page, MAP_SUPERBLOCK_SIZE);
+}
+
+bool
+MapSBlockRead(SMgrRelation reln, MapSuperblock *super)
+{
+ char page[BLCKSZ];
+
+ Assert(reln != NULL);
+ Assert(super != NULL);
+
+ if (!UmMetadataExists(reln))
+ return false;
+
+ if (UmMetadataNblocks(reln) == 0)
+ return false;
+
+ UmMetadataRead(reln, MAP_BLOCK_SUPER, page);
+ MapSuperblockUnpackPage(super, page);
+
+ if (!MapSuperblockHasValidIdentity(super))
+ MapSBlockReportCorrupt(reln, "invalid identity");
+ if (!MapSuperblockCheckCRC(super))
+ MapSBlockReportCorrupt(reln, "CRC mismatch");
+
+ return true;
+}
+
+void
+MapSBlockWrite(SMgrRelation reln, const MapSuperblock *super, bool skipFsync)
+{
+ MapSuperblock write_super;
+ char page[BLCKSZ];
+
+ Assert(reln != NULL);
+ Assert(super != NULL);
+
+ write_super = *super;
+ MapSuperblockRefreshCRC(&write_super);
+ MapSuperblockPackPage(&write_super, page);
+
+ if (!UmMetadataOpenOrCreate(reln, false, NULL))
+ elog(ERROR, "could not open Umbra metadata file for superblock write");
+
+ if (UmMetadataNblocks(reln) == 0)
+ UmMetadataExtend(reln, MAP_BLOCK_SUPER, page, skipFsync);
+ else
+ UmMetadataWrite(reln, MAP_BLOCK_SUPER, page, skipFsync);
+}
+
+void
+MapSBlockInitNew(SMgrRelation reln, uint32 flags, XLogRecPtr lsn, bool skipFsync)
+{
+ MapSuperblock super;
+
+ MapSuperblockInit(&super, flags);
+ MapSuperblockSetLastUpdatedLSN(&super, lsn);
+ MapSBlockWrite(reln, &super, skipFsync);
+}
+
+static void
+MapSBlockReportCorrupt(SMgrRelation reln, const char *reason)
+{
+ RelFileLocator rlocator = reln->smgr_rlocator.locator;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("Umbra metadata superblock is corrupted for relation %u/%u/%u: %s",
+ rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, reason)));
+}
diff --git a/src/backend/storage/map/meson.build b/src/backend/storage/map/meson.build
new file mode 100644
index 0000000000..0f780fe522
--- /dev/null
+++ b/src/backend/storage/map/meson.build
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2025, PostgreSQL Global Development Group
+
+backend_sources += files(
+ 'map.c',
+ 'mapsuper.c',
+)
diff --git a/src/backend/storage/meson.build b/src/backend/storage/meson.build
index 05637aa3a4..2f80f3f575 100644
--- a/src/backend/storage/meson.build
+++ b/src/backend/storage/meson.build
@@ -7,6 +7,9 @@ subdir('freespace')
subdir('ipc')
subdir('large_object')
subdir('lmgr')
+if get_option('umbra').enabled()
+ subdir('map')
+endif
subdir('page')
subdir('smgr')
subdir('sync')
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index a7b70d856c..c9a3ef6461 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -127,6 +127,13 @@ typedef struct f_smgr
BlockNumber old_blocks, BlockNumber nblocks);
void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum);
+ void (*smgr_create_relation_metadata) (SMgrRelation reln);
+ void (*smgr_copy_relation_metadata) (SMgrRelation src,
+ SMgrRelation dst,
+ char relpersistence);
+ void (*smgr_sync_relation_metadata) (SMgrRelation reln);
+ void (*smgr_unlink_relation_metadata) (RelFileLocatorBackend rlocator,
+ bool isRedo);
int (*smgr_fd) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off);
} f_smgr;
@@ -161,6 +168,10 @@ static const f_smgr smgrsw[] = {
.smgr_truncate = mdtruncate,
.smgr_immedsync = mdimmedsync,
.smgr_registersync = mdregistersync,
+ .smgr_create_relation_metadata = NULL,
+ .smgr_copy_relation_metadata = NULL,
+ .smgr_sync_relation_metadata = NULL,
+ .smgr_unlink_relation_metadata = NULL,
.smgr_fd = mdfd,
},
#ifdef USE_UMBRA
@@ -186,6 +197,10 @@ static const f_smgr smgrsw[] = {
.smgr_truncate = umtruncate,
.smgr_immedsync = umimmedsync,
.smgr_registersync = umregistersync,
+ .smgr_create_relation_metadata = umcreaterelationmetadata,
+ .smgr_copy_relation_metadata = umcopyrelationmetadata,
+ .smgr_sync_relation_metadata = umsyncrelationmetadata,
+ .smgr_unlink_relation_metadata = umunlinkrelationmetadata,
.smgr_fd = umfd,
},
#endif
@@ -529,6 +544,34 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
RESUME_INTERRUPTS();
}
+void
+smgrcreaterelationmetadata(SMgrRelation reln)
+{
+ if (smgrsw[reln->smgr_which].smgr_create_relation_metadata)
+ smgrsw[reln->smgr_which].smgr_create_relation_metadata(reln);
+}
+
+void
+smgrcopyrelationmetadata(SMgrRelation src, SMgrRelation dst, char relpersistence)
+{
+ if (smgrsw[dst->smgr_which].smgr_copy_relation_metadata)
+ smgrsw[dst->smgr_which].smgr_copy_relation_metadata(src, dst,
+ relpersistence);
+}
+
+void
+smgrsyncrelationmetadata(SMgrRelation reln)
+{
+ if (smgrsw[reln->smgr_which].smgr_sync_relation_metadata)
+ smgrsw[reln->smgr_which].smgr_sync_relation_metadata(reln);
+}
+
+void
+smgrunlinkrelationmetadata(RelFileLocatorBackend rlocator, bool isRedo)
+{
+ if (smgrsw[0].smgr_unlink_relation_metadata)
+ smgrsw[0].smgr_unlink_relation_metadata(rlocator, isRedo);
+}
/*
* smgrdosyncall() -- Immediately sync all forks of all given relations
*
@@ -563,6 +606,8 @@ smgrdosyncall(SMgrRelation *rels, int nrels)
if (smgrsw[which].smgr_exists(rels[i], forknum))
smgrsw[which].smgr_immedsync(rels[i], forknum);
}
+
+ smgrsyncrelationmetadata(rels[i]);
}
RESUME_INTERRUPTS();
@@ -643,6 +688,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
+
+ smgrunlinkrelationmetadata(rlocators[i], isRedo);
}
pfree(rlocators);
diff --git a/src/backend/storage/smgr/umbra.c b/src/backend/storage/smgr/umbra.c
index 2c08231587..fc6e480276 100644
--- a/src/backend/storage/smgr/umbra.c
+++ b/src/backend/storage/smgr/umbra.c
@@ -4,8 +4,9 @@
* Umbra storage manager skeleton.
*
* This file establishes Umbra as a separate smgr implementation from md.c.
- * Data-fork operations remain md-backed here, while relation-local metadata
- * file operations go through umfile.
+ * maintains identity mapping state (logical block number == physical block
+ * number) in the relation-local metadata file while using md.c for data-fork
+ * I/O and umfile for metadata-file I/O.
*
* src/backend/storage/smgr/umbra.c
*
@@ -13,7 +14,9 @@
*/
#include "postgres.h"
+#include "catalog/pg_class.h"
#include "storage/md.h"
+#include "storage/mapsuper.h"
#include "storage/smgr.h"
#include "storage/umfile.h"
#include "storage/umbra.h"
@@ -24,7 +27,11 @@ typedef struct UmbraSmgrRelationState
UmbraFileContext *filectx;
} UmbraSmgrRelationState;
+static bool um_tracks_identity_metadata(ForkNumber forknum);
static UmbraFileContext *um_relation_filectx(SMgrRelation reln);
+static void um_identity_update_metadata(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber nblocks, bool fork_exists,
+ bool skipFsync);
bool
UmMetadataExists(SMgrRelation reln)
@@ -124,7 +131,7 @@ umdestroy(SMgrRelation reln)
{
UmbraSmgrRelationState *state = reln->smgr_private;
- umfile_ctx_forget(reln->smgr_rlocator);
+ umfile_ctx_release(reln->smgr_rlocator);
if (state != NULL)
{
@@ -133,15 +140,94 @@ umdestroy(SMgrRelation reln)
}
}
+bool
+umisinternalfork(ForkNumber forknum)
+{
+ return forknum == UMBRA_METADATA_FORKNUM;
+}
+
+void
+umcreaterelationmetadata(SMgrRelation reln)
+{
+ bool created = false;
+
+ if (!UmMetadataOpenOrCreate(reln, false, &created))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create Umbra metadata fork for relation %u/%u/%u",
+ reln->smgr_rlocator.locator.spcOid,
+ reln->smgr_rlocator.locator.dbOid,
+ reln->smgr_rlocator.locator.relNumber)));
+}
+
+void
+umcopyrelationmetadata(SMgrRelation src, SMgrRelation dst, char relpersistence)
+{
+ BlockNumber src_nblocks;
+ BlockNumber dst_nblocks;
+ PGIOAlignedBlock pagebuf;
+ bool created = false;
+
+ if (relpersistence != RELPERSISTENCE_PERMANENT)
+ return;
+
+ if (!UmMetadataExists(src))
+ return;
+
+ if (!UmMetadataOpenOrCreate(dst, false, &created))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create Umbra metadata fork for relation %u/%u/%u",
+ dst->smgr_rlocator.locator.spcOid,
+ dst->smgr_rlocator.locator.dbOid,
+ dst->smgr_rlocator.locator.relNumber)));
+
+ src_nblocks = UmMetadataNblocks(src);
+ dst_nblocks = UmMetadataNblocks(dst);
+
+ for (BlockNumber blkno = 0; blkno < src_nblocks; blkno++)
+ {
+ UmMetadataRead(src, blkno, pagebuf.data);
+ if (blkno < dst_nblocks)
+ UmMetadataWrite(dst, blkno, pagebuf.data, true);
+ else
+ UmMetadataExtend(dst, blkno, pagebuf.data, true);
+ }
+
+ UmMetadataImmediateSync(dst);
+}
+
+void
+umsyncrelationmetadata(SMgrRelation reln)
+{
+ if (!UmMetadataExists(reln))
+ return;
+
+ UmMetadataImmediateSync(reln);
+}
+
+void
+umunlinkrelationmetadata(RelFileLocatorBackend rlocator, bool isRedo)
+{
+ umfile_ctx_forget(rlocator);
+ UmMetadataUnlink(rlocator, isRedo);
+}
+
void
umcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
mdcreate(reln, forknum, isRedo);
+
+ if (um_tracks_identity_metadata(forknum))
+ um_identity_update_metadata(reln, forknum, 0, true, true);
}
bool
umexists(SMgrRelation reln, ForkNumber forknum)
{
+ if (forknum == UMBRA_METADATA_FORKNUM)
+ return UmMetadataExists(reln);
+
return mdexists(reln, forknum);
}
@@ -167,13 +253,30 @@ umextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync)
{
mdextend(reln, forknum, blocknum, buffer, skipFsync);
+
+ if (um_tracks_identity_metadata(forknum))
+ um_identity_update_metadata(reln, forknum, blocknum + 1, true,
+ skipFsync);
}
void
umzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks, bool skipFsync)
{
+ BlockNumber target_nblocks;
+
mdzeroextend(reln, forknum, blocknum, nblocks, skipFsync);
+
+ if (um_tracks_identity_metadata(forknum))
+ {
+ target_nblocks = blocknum + (BlockNumber) nblocks;
+ if (target_nblocks < blocknum)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("Umbra identity mapping block count overflow")));
+ um_identity_update_metadata(reln, forknum, target_nblocks, true,
+ skipFsync);
+ }
}
bool
@@ -220,6 +323,11 @@ umwriteback(SMgrRelation reln, ForkNumber forknum,
BlockNumber
umnblocks(SMgrRelation reln, ForkNumber forknum)
{
+ /*
+ * Keep md.c responsible for the physical fork size query. mdtruncate()
+ * relies on a preceding mdnblocks() call to have opened all active
+ * segments.
+ */
return mdnblocks(reln, forknum);
}
@@ -228,12 +336,18 @@ umtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber old_blocks, BlockNumber nblocks)
{
mdtruncate(reln, forknum, old_blocks, nblocks);
+
+ if (um_tracks_identity_metadata(forknum))
+ um_identity_update_metadata(reln, forknum, nblocks, true, false);
}
void
umimmedsync(SMgrRelation reln, ForkNumber forknum)
{
mdimmedsync(reln, forknum);
+
+ if (um_tracks_identity_metadata(forknum) && UmMetadataExists(reln))
+ UmMetadataImmediateSync(reln);
}
void
@@ -261,3 +375,41 @@ um_relation_filectx(SMgrRelation reln)
return state->filectx;
}
+
+static bool
+um_tracks_identity_metadata(ForkNumber forknum)
+{
+ return forknum == MAIN_FORKNUM ||
+ forknum == FSM_FORKNUM ||
+ forknum == VISIBILITYMAP_FORKNUM;
+}
+
+static void
+um_identity_update_metadata(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber nblocks, bool fork_exists,
+ bool skipFsync)
+{
+ MapSuperblock super;
+
+ Assert(reln != NULL);
+ Assert(um_tracks_identity_metadata(forknum));
+
+ if (!MapSBlockRead(reln, &super))
+ MapSuperblockInit(&super, 0);
+
+ if (!fork_exists && forknum != MAIN_FORKNUM)
+ {
+ MapSuperblockSetLogicalNblocks(&super, forknum, InvalidBlockNumber);
+ MapSuperblockSetNextFreePhysBlock(&super, forknum, InvalidBlockNumber);
+ MapSuperblockSetPhysCapacity(&super, forknum, InvalidBlockNumber);
+ }
+ else
+ {
+ MapSuperblockSetLogicalNblocks(&super, forknum, nblocks);
+ MapSuperblockSetNextFreePhysBlock(&super, forknum, nblocks);
+ MapSuperblockSetPhysCapacity(&super, forknum, nblocks);
+ }
+
+ MapSuperblockSetLastUpdatedLSN(&super, InvalidXLogRecPtr);
+ MapSBlockWrite(reln, &super, skipFsync);
+}
diff --git a/src/include/storage/map.h b/src/include/storage/map.h
new file mode 100644
index 0000000000..b0887794c3
--- /dev/null
+++ b/src/include/storage/map.h
@@ -0,0 +1,53 @@
+/*-------------------------------------------------------------------------
+ *
+ * map.h
+ * Umbra metadata-fork disk layout helpers.
+ *
+ * This header defines the stable on-disk page layout and address translation
+ * helpers for Umbra's relation-local metadata file.
+ *
+ * src/include/storage/map.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MAP_H
+#define MAP_H
+
+#include "storage/block.h"
+#include "storage/relfilelocator.h"
+
+#define MAP_ENTRIES_PER_PAGE (BLCKSZ / sizeof(uint32))
+
+/*
+ * Umbra metadata file page layout:
+ * - block 0: superblock payload
+ * - blocks 1..: repeated proportional groups
+ *
+ * Each group reserves one FSM map page, one VM map page, and 8192 MAIN map
+ * pages. That keeps the mapping formula stable while leaving room for the
+ * auxiliary forks to grow alongside MAIN.
+ */
+#define MAP_BLOCK_SUPER 0
+#define MAP_BLOCK_FIRST_GROUP 1
+#define MAP_GROUP_FSM_PAGES 1
+#define MAP_GROUP_VM_PAGES 1
+#define MAP_GROUP_MAIN_PAGES 8192
+#define MAP_GROUP_TOTAL_PAGES \
+ (MAP_GROUP_FSM_PAGES + MAP_GROUP_VM_PAGES + MAP_GROUP_MAIN_PAGES)
+
+typedef struct MapPage
+{
+ uint32 pblknos[MAP_ENTRIES_PER_PAGE];
+} MapPage;
+
+extern void MapPageInit(MapPage *page);
+extern BlockNumber MapPageGetEntry(const MapPage *page, int entry_idx);
+extern void MapPageSetEntry(MapPage *page, int entry_idx, BlockNumber pblkno);
+
+extern BlockNumber MapForkPageIndexToMapBlkno(ForkNumber forknum,
+ BlockNumber fork_page_idx);
+extern BlockNumber MapLblknoToMapBlkno(ForkNumber forknum, BlockNumber lblkno);
+extern bool MapDecodeMapBlkno(BlockNumber map_blkno, ForkNumber *forknum,
+ BlockNumber *fork_page_idx);
+
+#endif /* MAP_H */
diff --git a/src/include/storage/mapsuper.h b/src/include/storage/mapsuper.h
new file mode 100644
index 0000000000..1f6a5dca5a
--- /dev/null
+++ b/src/include/storage/mapsuper.h
@@ -0,0 +1,100 @@
+/*-------------------------------------------------------------------------
+ *
+ * mapsuper.h
+ * Umbra metadata superblock helpers.
+ *
+ * The superblock is stored in metadata block 0. Its first 512 bytes contain a
+ * versioned payload plus CRC, and the remainder of the block is reserved.
+ *
+ * src/include/storage/mapsuper.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MAPSUPER_H
+#define MAPSUPER_H
+
+#include "access/xlogdefs.h"
+#include "port/pg_crc32c.h"
+#include "storage/block.h"
+#include "storage/smgr.h"
+
+#define MAP_SUPERBLOCK_MAGIC 0x554D4252U /* "UMBR" */
+#define MAP_SUPERBLOCK_VERSION 1U
+#define MAP_SUPERBLOCK_SIZE 512
+#define MAP_SUPERBLOCK_PAYLOAD_SIZE 64
+
+#define MAP_SUPERBLOCK_FLAG_SKIP_WAL_PENDING 0x00000001U
+
+typedef struct pg_attribute_packed() MapSuperblockData
+{
+ uint32 magic;
+ uint32 version;
+ uint32 blcksz;
+ uint32 flags;
+
+ BlockNumber next_free_phys_block_main;
+ BlockNumber phys_capacity_main;
+ BlockNumber next_free_phys_block_fsm;
+ BlockNumber phys_capacity_fsm;
+ BlockNumber next_free_phys_block_vm;
+ BlockNumber phys_capacity_vm;
+
+ BlockNumber logical_nblocks_main;
+ BlockNumber logical_nblocks_fsm;
+ BlockNumber logical_nblocks_vm;
+
+ XLogRecPtr last_updated_lsn;
+ pg_crc32c crc;
+} MapSuperblockData;
+
+typedef union MapSuperblock
+{
+ MapSuperblockData data;
+ char padding[MAP_SUPERBLOCK_SIZE];
+} MapSuperblock;
+
+typedef char MapSuperblockDataSizeCheck
+[(sizeof(MapSuperblockData) == MAP_SUPERBLOCK_PAYLOAD_SIZE) ? 1 : -1];
+typedef char MapSuperblockDataCRCOffsetCheck
+[(offsetof(MapSuperblockData, crc) == 60) ? 1 : -1];
+typedef char MapSuperblockSizeCheck
+[(sizeof(MapSuperblock) == MAP_SUPERBLOCK_SIZE) ? 1 : -1];
+
+extern void MapSuperblockInit(MapSuperblock *super, uint32 flags);
+extern bool MapSuperblockHasValidIdentity(const MapSuperblock *super);
+extern bool MapSuperblockIsValid(const MapSuperblock *super);
+extern bool MapSuperblockCheckCRC(const MapSuperblock *super);
+extern void MapSuperblockRefreshCRC(MapSuperblock *super);
+
+extern void MapSuperblockSetFlags(MapSuperblock *super, uint32 flags);
+extern uint32 MapSuperblockGetFlags(const MapSuperblock *super);
+
+extern void MapSuperblockSetLastUpdatedLSN(MapSuperblock *super, XLogRecPtr lsn);
+extern XLogRecPtr MapSuperblockGetLastUpdatedLSN(const MapSuperblock *super);
+
+extern BlockNumber MapSuperblockGetNextFreePhysBlock(const MapSuperblock *super,
+ ForkNumber forknum);
+extern void MapSuperblockSetNextFreePhysBlock(MapSuperblock *super,
+ ForkNumber forknum,
+ BlockNumber blkno);
+
+extern BlockNumber MapSuperblockGetPhysCapacity(const MapSuperblock *super,
+ ForkNumber forknum);
+extern void MapSuperblockSetPhysCapacity(MapSuperblock *super, ForkNumber forknum,
+ BlockNumber blkno);
+
+extern BlockNumber MapSuperblockGetLogicalNblocks(const MapSuperblock *super,
+ ForkNumber forknum);
+extern void MapSuperblockSetLogicalNblocks(MapSuperblock *super, ForkNumber forknum,
+ BlockNumber nblocks);
+
+extern void MapSuperblockPackPage(const MapSuperblock *super, char page[BLCKSZ]);
+extern void MapSuperblockUnpackPage(MapSuperblock *super, const char page[BLCKSZ]);
+
+extern bool MapSBlockRead(SMgrRelation reln, MapSuperblock *super);
+extern void MapSBlockWrite(SMgrRelation reln, const MapSuperblock *super,
+ bool skipFsync);
+extern void MapSBlockInitNew(SMgrRelation reln, uint32 flags, XLogRecPtr lsn,
+ bool skipFsync);
+
+#endif /* MAPSUPER_H */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 1076717b92..8d06d69b51 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -113,6 +113,12 @@ extern void smgrwriteback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern BlockNumber smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum);
+extern void smgrcreaterelationmetadata(SMgrRelation reln);
+extern void smgrcopyrelationmetadata(SMgrRelation src, SMgrRelation dst,
+ char relpersistence);
+extern void smgrsyncrelationmetadata(SMgrRelation reln);
+extern void smgrunlinkrelationmetadata(RelFileLocatorBackend rlocator,
+ bool isRedo);
extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
BlockNumber *old_nblocks,
BlockNumber *nblocks);
diff --git a/src/include/storage/umbra.h b/src/include/storage/umbra.h
index 30e033fcf0..2fb3c2f75e 100644
--- a/src/include/storage/umbra.h
+++ b/src/include/storage/umbra.h
@@ -34,6 +34,13 @@ extern void uminit(void);
extern void umopen(SMgrRelation reln);
extern void umclose(SMgrRelation reln, ForkNumber forknum);
extern void umdestroy(SMgrRelation reln);
+extern bool umisinternalfork(ForkNumber forknum);
+extern void umcreaterelationmetadata(SMgrRelation reln);
+extern void umcopyrelationmetadata(SMgrRelation src, SMgrRelation dst,
+ char relpersistence);
+extern void umsyncrelationmetadata(SMgrRelation reln);
+extern void umunlinkrelationmetadata(RelFileLocatorBackend rlocator,
+ bool isRedo);
extern void umcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool umexists(SMgrRelation reln, ForkNumber forknum);
extern void umunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
--
2.50.1 (Apple Git-155)
| From | Date | Subject | |
|---|---|---|---|
| Next Message | Mingwei Jia | 2026-06-01 23:33:35 | [RFC PATCH v2 RESEND 05/10] umbra: add patch 4 shared-memory MAP cache and checkpoint flush |
| Previous Message | Mingwei Jia | 2026-06-01 23:33:33 | [RFC PATCH v2 RESEND 03/10] umbra: add patch 2 umfile physical file manager and metadata storage primitives |