From 37881f01b5e30d75771e8f0888b668d368f64b20 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 14 Aug 2025 17:06:10 +0900 Subject: [PATCH v1 09/11] Add support for oid8 TOAST values This commit adds the possibility to define TOAST tables with oid8 as value ID, based on the reloption toast_value_type. All the external TOAST pointers still rely on varatt_external and a single vartag, with all the values inserted in the bigint TOAST tables fed from the existing OID value generator. This will be changed in an upcoming patch that adds more vartag_external types and its associated structures, with the code being able to use a different external TOAST pointer depending on the attribute type of chunk_id in TOAST relations. All the changes done here are mechanical, with all the TOAST code able to do chunk ID lookups based on the two types now supported. XXX: Catalog version bump required. --- src/include/catalog/pg_opclass.dat | 3 +- src/include/utils/rel.h | 1 + src/backend/access/common/reloptions.c | 1 + src/backend/access/common/toast_internals.c | 94 +++++++++++++++------ src/backend/access/heap/heaptoast.c | 19 ++++- src/backend/catalog/toasting.c | 24 +++++- doc/src/sgml/ref/create_table.sgml | 2 + doc/src/sgml/storage.sgml | 7 +- contrib/amcheck/verify_heapam.c | 22 +++-- 9 files changed, 132 insertions(+), 41 deletions(-) diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index df170b80840b..b84c2bb7a8c3 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -179,7 +179,8 @@ opcintype => 'xid8' }, { opcmethod => 'hash', opcname => 'oid8_ops', opcfamily => 'hash/oid8_ops', opcintype => 'oid8' }, -{ opcmethod => 'btree', opcname => 'oid8_ops', opcfamily => 'btree/oid8_ops', +{ oid => '8285', oid_symbol => 'OID8_BTREE_OPS_OID', + opcmethod => 'btree', opcname => 'oid8_ops', opcfamily => 'btree/oid8_ops', opcintype => 'oid8' }, { opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops', opcintype => 'cid' }, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index f3be0acd4255..3c64991533d8 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -345,6 +345,7 @@ typedef enum StdRdOptToastValueType { STDRD_OPTION_TOAST_VALUE_TYPE_INVALID = 0, STDRD_OPTION_TOAST_VALUE_TYPE_OID, + STDRD_OPTION_TOAST_VALUE_TYPE_OID8, } StdRdOptToastValueType; typedef struct StdRdOptions diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 66fd2a05538e..fda776215cdd 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -553,6 +553,7 @@ static relopt_enum_elt_def StdRdOptToastValueTypes[] = { /* no value for INVALID */ {"oid", STDRD_OPTION_TOAST_VALUE_TYPE_OID}, + {"oid8", STDRD_OPTION_TOAST_VALUE_TYPE_OID8}, {(const char *) NULL} /* list terminator */ }; diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index ca5ed8dfabca..e9ced27baf7e 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -25,6 +25,7 @@ #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/lsyscache.h" static bool toastrel_valueid_exists(Relation toastrel, Oid8 valueid); static bool toastid_valueid_exists(Oid toastrelid, Oid8 valueid); @@ -131,8 +132,10 @@ toast_save_datum(Relation rel, Datum value, Pointer dval = DatumGetPointer(value); int num_indexes; int validIndex; + Oid toast_typid = get_atttype(rel->rd_rel->reltoastrelid, 1); Assert(!VARATT_IS_EXTERNAL(dval)); + Assert(OidIsValid(toast_typid)); /* * Open the toast relation and its indexes. We can use the index to check @@ -200,24 +203,32 @@ toast_save_datum(Relation rel, Datum value, toast_pointer.va_toastrelid = RelationGetRelid(toastrel); /* - * Choose an OID to use as the value ID for this toast value. + * Choose a new value to use as the value ID for this toast value, be it + * for OID or int8-based TOAST relations. * - * Normally we just choose an unused OID within the toast table. But + * Normally we just choose an unused value within the toast table. But * during table-rewriting operations where we are preserving an existing - * toast table OID, we want to preserve toast value OIDs too. So, if + * toast table OID, we want to preserve toast value IDs too. So, if * rd_toastoid is set and we had a prior external value from that same * toast table, re-use its value ID. If we didn't have a prior external * value (which is a corner case, but possible if the table's attstorage * options have been changed), we have to pick a value ID that doesn't - * conflict with either new or existing toast value OIDs. + * conflict with either new or existing toast value IDs. If the TOAST + * table uses 8-byte value IDs, we should not really care much about + * that. */ if (!OidIsValid(rel->rd_toastoid)) { /* normal case: just choose an unused OID */ - toast_pointer.va_valueid = - GetNewOidWithIndex(toastrel, - RelationGetRelid(toastidxs[validIndex]), - (AttrNumber) 1); + if (toast_typid == OIDOID) + toast_pointer.va_valueid = + GetNewOidWithIndex(toastrel, + RelationGetRelid(toastidxs[validIndex]), + (AttrNumber) 1); + else if (toast_typid == OID8OID) + toast_pointer.va_valueid = GetNewObjectId8(); + else + Assert(false); } else { @@ -263,17 +274,22 @@ toast_save_datum(Relation rel, Datum value, if (toast_pointer.va_valueid == InvalidOid) { /* - * new value; must choose an OID that doesn't conflict in either - * old or new toast table + * new value; must choose a value that doesn't conflict in either + * old or new toast table. */ - do + if (toast_typid == OIDOID) { - toast_pointer.va_valueid = - GetNewOidWithIndex(toastrel, - RelationGetRelid(toastidxs[validIndex]), - (AttrNumber) 1); - } while (toastid_valueid_exists(rel->rd_toastoid, - toast_pointer.va_valueid)); + do + { + toast_pointer.va_valueid = + GetNewOidWithIndex(toastrel, + RelationGetRelid(toastidxs[validIndex]), + (AttrNumber) 1); + } while (toastid_valueid_exists(rel->rd_toastoid, + toast_pointer.va_valueid)); + } + else if (toast_typid == OID8OID) + toast_pointer.va_valueid = GetNewObjectId8(); } } @@ -303,7 +319,10 @@ toast_save_datum(Relation rel, Datum value, /* * Build a tuple and store it */ - t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); + if (toast_typid == OIDOID) + t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); + else if (toast_typid == OID8OID) + t_values[0] = ObjectId8GetDatum(toast_pointer.va_valueid); t_values[1] = Int32GetDatum(chunk_seq++); SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ); memcpy(VARDATA(&chunk_data), data_p, chunk_size); @@ -384,6 +403,7 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative) HeapTuple toasttup; int num_indexes; int validIndex; + Oid toast_typid; if (!VARATT_IS_EXTERNAL_ONDISK(attr)) return; @@ -395,6 +415,8 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative) * Open the toast relation and its indexes */ toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock); + toast_typid = TupleDescAttr(toastrel->rd_att, 0)->atttypid; + Assert(toast_typid == OIDOID || toast_typid == OID8OID); /* Fetch valid relation used for process */ validIndex = toast_open_indexes(toastrel, @@ -405,10 +427,18 @@ toast_delete_datum(Relation rel, Datum value, bool is_speculative) /* * Setup a scan key to find chunks with matching va_valueid */ - ScanKeyInit(&toastkey, - (AttrNumber) 1, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(toast_pointer.va_valueid)); + if (toast_typid == OIDOID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(toast_pointer.va_valueid)); + else if (toast_typid == OID8OID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OID8EQ, + ObjectId8GetDatum(toast_pointer.va_valueid)); + else + Assert(false); /* * Find all the chunks. (We don't actually care whether we see them in @@ -455,6 +485,7 @@ toastrel_valueid_exists(Relation toastrel, Oid8 valueid) int num_indexes; int validIndex; Relation *toastidxs; + Oid toast_typid; /* Fetch a valid index relation */ validIndex = toast_open_indexes(toastrel, @@ -462,13 +493,24 @@ toastrel_valueid_exists(Relation toastrel, Oid8 valueid) &toastidxs, &num_indexes); + toast_typid = TupleDescAttr(toastrel->rd_att, 0)->atttypid; + Assert(toast_typid == OIDOID || toast_typid == OID8OID); + /* * Setup a scan key to find chunks with matching va_valueid */ - ScanKeyInit(&toastkey, - (AttrNumber) 1, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(valueid)); + if (toast_typid == OIDOID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(valueid)); + else if (toast_typid == OID8OID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OID8EQ, + ObjectId8GetDatum(valueid)); + else + Assert(false); /* * Is there any such chunk? diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index 81154c17376c..a93e98788da7 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -640,6 +640,7 @@ heap_fetch_toast_slice(Relation toastrel, Oid8 valueid, int32 attrsize, int num_indexes; int validIndex; int32 max_chunk_size; + Oid toast_typid; /* Look for the valid index of toast relation */ validIndex = toast_open_indexes(toastrel, @@ -647,6 +648,8 @@ heap_fetch_toast_slice(Relation toastrel, Oid8 valueid, int32 attrsize, &toastidxs, &num_indexes); + toast_typid = TupleDescAttr(toastrel->rd_att, 0)->atttypid; + Assert(toast_typid == OIDOID || toast_typid == OID8OID); max_chunk_size = TOAST_OID_MAX_CHUNK_SIZE; totalchunks = ((attrsize - 1) / max_chunk_size) + 1; @@ -655,10 +658,18 @@ heap_fetch_toast_slice(Relation toastrel, Oid8 valueid, int32 attrsize, Assert(endchunk <= totalchunks); /* Set up a scan key to fetch from the index. */ - ScanKeyInit(&toastkey[0], - (AttrNumber) 1, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(valueid)); + if (toast_typid == OIDOID) + ScanKeyInit(&toastkey[0], + (AttrNumber) 1, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(valueid)); + else if (toast_typid == OID8OID) + ScanKeyInit(&toastkey[0], + (AttrNumber) 1, + BTEqualStrategyNumber, F_OID8EQ, + ObjectId8GetDatum(valueid)); + else + Assert(false); /* * No additional condition if fetching all chunks. Otherwise, use an diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 28ddbd10d357..66d483bb1be3 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -32,6 +32,7 @@ #include "nodes/makefuncs.h" #include "utils/fmgroids.h" #include "utils/rel.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" static void CheckAndCreateToastTable(Oid relOid, Datum reloptions, @@ -168,6 +169,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, value_type = RelationGetToastValueType(rel, STDRD_OPTION_TOAST_VALUE_TYPE_OID); if (value_type == STDRD_OPTION_TOAST_VALUE_TYPE_OID) toast_chunkid_typid = OIDOID; + else if (value_type == STDRD_OPTION_TOAST_VALUE_TYPE_OID8) + toast_chunkid_typid = OID8OID; } else { @@ -200,7 +203,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("toast chunk_id type not set while in binary upgrade mode"))); - if (binary_upgrade_next_toast_chunk_id_typoid != OIDOID) + if (binary_upgrade_next_toast_chunk_id_typoid != OIDOID && + binary_upgrade_next_toast_chunk_id_typoid != OID8OID) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("cannot support toast chunk_id type %u in binary upgrade mode", @@ -225,6 +229,19 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, snprintf(toast_idxname, sizeof(toast_idxname), "pg_toast_%u_index", relOid); + /* + * Special case here. If OIDOldToast is defined, we need to rely on the + * existing table for the job because we do not want to create an + * inconsistent relation that would conflict with the parent and break + * the world. + */ + if (OidIsValid(OIDOldToast)) + { + toast_chunkid_typid = get_atttype(OIDOldToast, 1); + if (!OidIsValid(toast_chunkid_typid)) + elog(ERROR, "cache lookup failed for relation %u", OIDOldToast); + } + /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(3); TupleDescInitEntry(tupdesc, (AttrNumber) 1, @@ -343,7 +360,10 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, collationIds[0] = InvalidOid; collationIds[1] = InvalidOid; - opclassIds[0] = OID_BTREE_OPS_OID; + if (toast_chunkid_typid == OIDOID) + opclassIds[0] = OID_BTREE_OPS_OID; + else if (toast_chunkid_typid == OID8OID) + opclassIds[0] = OID8_BTREE_OPS_OID; opclassIds[1] = INT4_BTREE_OPS_OID; coloptions[0] = 0; diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 3da754045c04..3ed8f3581177 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1666,6 +1666,8 @@ WITH ( MODULUS numeric_literal, REM relation for this table. By default this parameter is oid, to assign oid as attribute type to chunk_id. + This parameter can be set to oid8 to use oid8 + as attribute type for chunk_id. This parameter cannot be set for TOAST tables. diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index 6b1e00bfd38e..7afe435747d2 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -421,14 +421,15 @@ most TOAST_OID_MAX_CHUNK_SIZE bytes (by default this value is c so that four chunk rows will fit on a page, making it about 2000 bytes). Each chunk is stored as a separate row in the TOAST table belonging to the owning table. Every -TOAST table has the columns chunk_id (an OID -identifying the particular TOASTed value), +TOAST table has the columns +chunk_id (an OID or an 8-byte integer identifying +the particular TOASTed value), chunk_seq (a sequence number for the chunk within its value), and chunk_data (the actual data of the chunk). A unique index on chunk_id and chunk_seq provides fast retrieval of the values. A pointer datum representing an out-of-line on-disk TOASTed value therefore needs to store the OID of the -TOAST table in which to look and the OID of the specific value +TOAST table in which to look and the specific value (its chunk_id). For convenience, pointer datums also store the logical datum size (original uncompressed data length), physical stored size (different if compression was applied), and the compression method used, if diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 119a54a9404c..ad6c37eefae6 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1878,7 +1878,11 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta) int32 expected_chunk_seq = 0; int32 last_chunk_seq; Oid8 toast_valueid; - int32 max_chunk_size = TOAST_OID_MAX_CHUNK_SIZE; + int32 max_chunk_size; + Oid toast_typid; + + toast_typid = TupleDescAttr(ctx->toast_rel->rd_att, 0)->atttypid; + max_chunk_size = TOAST_OID_MAX_CHUNK_SIZE; extsize = VARATT_EXTERNAL_OID_GET_EXTSIZE(ta->toast_pointer); last_chunk_seq = (extsize - 1) / max_chunk_size; @@ -1886,10 +1890,18 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta) /* * Setup a scan key to find chunks in toast table with matching va_valueid */ - ScanKeyInit(&toastkey, - (AttrNumber) 1, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(ta->toast_pointer.va_valueid)); + if (toast_typid == OIDOID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ta->toast_pointer.va_valueid)); + else if (toast_typid == OID8OID) + ScanKeyInit(&toastkey, + (AttrNumber) 1, + BTEqualStrategyNumber, F_OID8EQ, + ObjectId8GetDatum(ta->toast_pointer.va_valueid)); + else + Assert(false); /* * Check if any chunks for this toasted object exist in the toast table, -- 2.54.0