From 499a7bd9aea3032f03d787833c0501d9fa703271 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Mon, 13 Feb 2017 21:20:12 +0100
Subject: [PATCH] pageinspect - page_checksum and bt_page_items(bytea)

Adds two functions to the pageinspect extension:

1) page_checksum(bytea,int4) allows computing checksum for
arbitrary page, even if data checksums are not enabled

2) bt_page_items(bytea) is similar to heap_page_items(bytea)
---
 contrib/pageinspect/btreefuncs.c              | 209 +++++++++++++++++++++++---
 contrib/pageinspect/expected/btree.out        |  13 ++
 contrib/pageinspect/pageinspect--1.5--1.6.sql |  22 +++
 contrib/pageinspect/rawpage.c                 |  37 +++++
 contrib/pageinspect/sql/btree.sql             |   4 +
 doc/src/sgml/pageinspect.sgml                 |  58 +++++++
 src/include/access/nbtree.h                   |   1 +
 7 files changed, 320 insertions(+), 24 deletions(-)

diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index d50ec3a..93da844 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -39,8 +39,14 @@
 #include "utils/varlena.h"
 
 
+extern Datum bt_metap(PG_FUNCTION_ARGS);
+extern Datum bt_page_items(PG_FUNCTION_ARGS);
+extern Datum bt_page_items_bytea(PG_FUNCTION_ARGS);
+extern Datum bt_page_stats(PG_FUNCTION_ARGS);
+
 PG_FUNCTION_INFO_V1(bt_metap);
 PG_FUNCTION_INFO_V1(bt_page_items);
+PG_FUNCTION_INFO_V1(bt_page_items_bytea);
 PG_FUNCTION_INFO_V1(bt_page_stats);
 
 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
@@ -215,17 +221,31 @@ bt_page_stats(PG_FUNCTION_ARGS)
 		elog(ERROR, "return type must be a row type");
 
 	j = 0;
-	values[j++] = psprintf("%d", stat.blkno);
-	values[j++] = psprintf("%c", stat.type);
-	values[j++] = psprintf("%d", stat.live_items);
-	values[j++] = psprintf("%d", stat.dead_items);
-	values[j++] = psprintf("%d", stat.avg_item_size);
-	values[j++] = psprintf("%d", stat.page_size);
-	values[j++] = psprintf("%d", stat.free_size);
-	values[j++] = psprintf("%d", stat.btpo_prev);
-	values[j++] = psprintf("%d", stat.btpo_next);
-	values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
-	values[j++] = psprintf("%d", stat.btpo_flags);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.blkno);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%c", stat.type);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.live_items);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.dead_items);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.avg_item_size);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.page_size);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.free_size);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.btpo_prev);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.btpo_next);
+	values[j] = palloc(32);
+	if (stat.type == 'd')
+		snprintf(values[j++], 32, "%d", stat.btpo.xact);
+	else
+		snprintf(values[j++], 32, "%d", stat.btpo.level);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", stat.btpo_flags);
 
 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
 								   values);
@@ -361,13 +381,18 @@ bt_page_items(PG_FUNCTION_ARGS)
 		itup = (IndexTuple) PageGetItem(uargs->page, id);
 
 		j = 0;
-		values[j++] = psprintf("%d", uargs->offset);
-		values[j++] = psprintf("(%u,%u)",
-							   BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
-							   itup->t_tid.ip_posid);
-		values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
-		values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
-		values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%d", uargs->offset);
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "(%u,%u)",
+				 BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
+				 itup->t_tid.ip_posid);
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
 
 		ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
 		dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
@@ -396,6 +421,136 @@ bt_page_items(PG_FUNCTION_ARGS)
 	}
 }
 
+/*-------------------------------------------------------
+ * bt_page_items_bytea()
+ *
+ * Get IndexTupleData set in a btree page
+ *
+ * Usage: SELECT * FROM bt_page_items(get_raw_page('t1_pkey', 1));
+ *-------------------------------------------------------
+ */
+
+Datum
+bt_page_items_bytea(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	Datum		result;
+	char	   *values[6];
+	HeapTuple	tuple;
+	FuncCallContext *fctx;
+	struct user_args *uargs;
+	int			raw_page_size;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use pageinspect functions"))));
+
+	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		BTPageOpaque opaque;
+		MemoryContext mctx;
+		TupleDesc	tupleDesc;
+
+		if (raw_page_size < SizeOfPageHeaderData)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				  errmsg("input page too small (%d bytes)", raw_page_size)));
+
+		fctx = SRF_FIRSTCALL_INIT();
+		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+		uargs = palloc(sizeof(struct user_args));
+
+		uargs->page = VARDATA(raw_page);
+
+		uargs->offset = FirstOffsetNumber;
+
+		opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
+
+		if (P_ISMETA(opaque))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				  errmsg("block is a meta page")));
+
+		if (P_ISDELETED(opaque))
+			elog(NOTICE, "page is deleted");
+
+		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
+
+		/* Build a tuple descriptor for our result type */
+		if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+			elog(ERROR, "return type must be a row type");
+
+		fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
+
+		fctx->user_fctx = uargs;
+
+		MemoryContextSwitchTo(mctx);
+	}
+
+	fctx = SRF_PERCALL_SETUP();
+	uargs = fctx->user_fctx;
+
+	if (fctx->call_cntr < fctx->max_calls)
+	{
+		ItemId		id;
+		IndexTuple	itup;
+		int			j;
+		int			off;
+		int			dlen;
+		char	   *dump;
+		char	   *ptr;
+
+		id = PageGetItemId(uargs->page, uargs->offset);
+
+		if (!ItemIdIsValid(id))
+			elog(ERROR, "invalid ItemId");
+
+		itup = (IndexTuple) PageGetItem(uargs->page, id);
+		j = 0;
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%d", uargs->offset);
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "(%u,%u)",
+				 BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
+				 itup->t_tid.ip_posid);
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
+		values[j] = palloc(32);
+		snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
+
+		ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
+		dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
+		dump = palloc0(dlen * 3 + 1);
+
+		values[j] = dump;
+		for (off = 0; off < dlen; off++)
+		{
+			if (off > 0)
+				*dump++ = ' ';
+			sprintf(dump, "%02x", *(ptr + off) & 0xff);
+			dump += 2;
+		}
+
+		tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
+		result = HeapTupleGetDatum(tuple);
+
+		uargs->offset = uargs->offset + 1;
+
+		SRF_RETURN_NEXT(fctx, result);
+	}
+	else
+	{
+		pfree(uargs);
+		SRF_RETURN_DONE(fctx);
+	}
+}
+
 
 /* ------------------------------------------------
  * bt_metap()
@@ -453,12 +608,18 @@ bt_metap(PG_FUNCTION_ARGS)
 		elog(ERROR, "return type must be a row type");
 
 	j = 0;
-	values[j++] = psprintf("%d", metad->btm_magic);
-	values[j++] = psprintf("%d", metad->btm_version);
-	values[j++] = psprintf("%d", metad->btm_root);
-	values[j++] = psprintf("%d", metad->btm_level);
-	values[j++] = psprintf("%d", metad->btm_fastroot);
-	values[j++] = psprintf("%d", metad->btm_fastlevel);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_magic);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_version);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_root);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_level);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_fastroot);
+	values[j] = palloc(32);
+	snprintf(values[j++], 32, "%d", metad->btm_fastlevel);
 
 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
 								   values);
diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out
index 82a49e3..67b103a 100644
--- a/contrib/pageinspect/expected/btree.out
+++ b/contrib/pageinspect/expected/btree.out
@@ -42,4 +42,17 @@ data       | 01 00 00 00 00 00 00 01
 
 SELECT * FROM bt_page_items('test1_a_idx', 2);
 ERROR:  block number out of range
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0));
+ERROR:  block is a meta page
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 1));
+-[ RECORD 1 ]-----------------------
+itemoffset | 1
+ctid       | (0,1)
+itemlen    | 16
+nulls      | f
+vars       | f
+data       | 01 00 00 00 00 00 00 01
+
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
+ERROR:  block number 2 is out of range for relation "test1_a_idx"
 DROP TABLE test1;
diff --git a/contrib/pageinspect/pageinspect--1.5--1.6.sql b/contrib/pageinspect/pageinspect--1.5--1.6.sql
index ac39568..df17fe6 100644
--- a/contrib/pageinspect/pageinspect--1.5--1.6.sql
+++ b/contrib/pageinspect/pageinspect--1.5--1.6.sql
@@ -75,3 +75,25 @@ CREATE FUNCTION hash_metapage_info(IN page bytea,
     OUT mapp int8[])
 AS 'MODULE_PATHNAME', 'hash_metapage_info'
 LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- page_checksum()
+--
+CREATE FUNCTION page_checksum(IN page bytea, IN blkno int4)
+RETURNS smallint
+AS 'MODULE_PATHNAME', 'page_checksum'
+LANGUAGE C STRICT;
+
+--
+-- bt_page_items_bytea()
+--
+CREATE FUNCTION bt_page_items(IN page bytea,
+    OUT itemoffset smallint,
+    OUT ctid tid,
+    OUT itemlen smallint,
+    OUT nulls bool,
+    OUT vars bool,
+    OUT data text)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'bt_page_items_bytea'
+LANGUAGE C STRICT;
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
index 102f360..0605989 100644
--- a/contrib/pageinspect/rawpage.c
+++ b/contrib/pageinspect/rawpage.c
@@ -24,6 +24,7 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
+#include "storage/checksum.h"
 #include "utils/builtins.h"
 #include "utils/pg_lsn.h"
 #include "utils/rel.h"
@@ -275,3 +276,39 @@ page_header(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(result);
 }
+
+/*
+ * page_header
+ *
+ * Allows inspection of page header fields of a raw page
+ */
+
+PG_FUNCTION_INFO_V1(page_checksum);
+
+Datum
+page_checksum(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	uint32		blkno = PG_GETARG_INT32(1);
+	int			raw_page_size;
+	PageHeader	page;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use raw page functions"))));
+
+	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+	/*
+	 * Check that the supplied page is of the right size.
+	 */
+	if (raw_page_size != BLCKSZ)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("incorrect size of input page (%d bytes)", raw_page_size)));
+
+	page = (PageHeader) VARDATA(raw_page);
+
+	PG_RETURN_INT16(pg_checksum_page((char *)page, blkno));
+}
diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql
index 1eafc32..8eac64c 100644
--- a/contrib/pageinspect/sql/btree.sql
+++ b/contrib/pageinspect/sql/btree.sql
@@ -14,4 +14,8 @@ SELECT * FROM bt_page_items('test1_a_idx', 0);
 SELECT * FROM bt_page_items('test1_a_idx', 1);
 SELECT * FROM bt_page_items('test1_a_idx', 2);
 
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 0));
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 1));
+SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
+
 DROP TABLE test1;
diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml
index 5e6712f..da5dd37 100644
--- a/doc/src/sgml/pageinspect.sgml
+++ b/doc/src/sgml/pageinspect.sgml
@@ -84,6 +84,33 @@ test=# SELECT * FROM page_header(get_raw_page('pg_class', 0));
 
    <varlistentry>
     <term>
+     <function>page_checksum(page bytea, blkno int4) returns smallint</function>
+     <indexterm>
+      <primary>page_checksum</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>page_checksum</function> computes a checksum for the page, as if
+      it was located at the given block.
+     </para>
+
+     <para>
+      A page image obtained with <function>get_raw_page</function> should be
+      passed as argument.  For example:
+<screen>
+test=# SELECT page_checksum(get_raw_page('pg_class', 0), 100);
+ page_checksum
+---------------
+         13443
+</screen>
+    </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
      <function>heap_page_items(page bytea) returns setof record</function>
      <indexterm>
       <primary>heap_page_items</primary>
@@ -290,6 +317,37 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+     <function>bt_page_items(page bytea) returns setof record</function>
+     <indexterm>
+      <primary>bt_page_items</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      Similarly to <function>heap_page_items</function>, it is also possible to
+      pass the page to <function>bt_page_items</function> as a <type>bytea</>
+      value. So the last example may also be rewritten like this:
+<screen>
+test=# SELECT * FROM bt_page_items(get_raw_page('pg_cast_oid_index', 1));
+ itemoffset |  ctid   | itemlen | nulls | vars |    data
+------------+---------+---------+-------+------+-------------
+          1 | (0,1)   |      12 | f     | f    | 23 27 00 00
+          2 | (0,2)   |      12 | f     | f    | 24 27 00 00
+          3 | (0,3)   |      12 | f     | f    | 25 27 00 00
+          4 | (0,4)   |      12 | f     | f    | 26 27 00 00
+          5 | (0,5)   |      12 | f     | f    | 27 27 00 00
+          6 | (0,6)   |      12 | f     | f    | 28 27 00 00
+          7 | (0,7)   |      12 | f     | f    | 29 27 00 00
+          8 | (0,8)   |      12 | f     | f    | 2a 27 00 00
+</screen>
+      All the other details are the same as explained in the previous section.
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
  </sect2>
 
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 6289ffa..f202715 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -177,6 +177,7 @@ typedef struct BTMetaPageData
 #define P_ISLEAF(opaque)		((opaque)->btpo_flags & BTP_LEAF)
 #define P_ISROOT(opaque)		((opaque)->btpo_flags & BTP_ROOT)
 #define P_ISDELETED(opaque)		((opaque)->btpo_flags & BTP_DELETED)
+#define P_ISMETA(opaque)		((opaque)->btpo_flags & BTP_META)
 #define P_ISHALFDEAD(opaque)	((opaque)->btpo_flags & BTP_HALF_DEAD)
 #define P_IGNORE(opaque)		((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))
 #define P_HAS_GARBAGE(opaque)	((opaque)->btpo_flags & BTP_HAS_GARBAGE)
-- 
2.5.5

