Skip site navigation (1) Skip section navigation (2)

Re: [HACKERS] pg_freespacemap question

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Mark Kirkwood <markir(at)paradise(dot)net(dot)nz>
Cc: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>, Tatsuo Ishii <ishii(at)sraoss(dot)co(dot)jp>, alvherre(at)commandprompt(dot)com, peter_e(at)gmx(dot)net, pgsql-hackers(at)postgresql(dot)org, pgsql-patches(at)postgresql(dot)org
Subject: Re: [HACKERS] pg_freespacemap question
Date: 2006-04-26 22:41:25
Message-ID: 200604262241.k3QMfPN19697@candle.pha.pa.us (view raw or flat)
Thread:
Lists: pgsql-hackerspgsql-patches
Patch applied.  Thanks.

---------------------------------------------------------------------------

Mark Kirkwood wrote:
> Tom Lane wrote:
> > Mark Kirkwood <markir(at)paradise(dot)net(dot)nz> writes:
> > 
> >>>Good points! I had not noticed this test case. Probably NULL is better 
> > 
> > 
> >>Would setting it to 'BLCKSZ - (fixed index header stuff)' be better, 
> > 
> > 
> > No, I don't think so, because that will just make it harder to recognize
> > what's what (remember that BLCKSZ isn't really a constant, and the index
> > overhead is not the same for all AMs either).  The point here is that
> > for indexes the FSM tracks whole-page availability, not the amount of
> > free space within pages.  So I think NULL is a reasonable representation
> > of that.  Using NULL will make it easy to filter the results if you want
> > to see only heap-page data or only index-page data, whereas it will be
> > very hard to do that if the view adopts an ultimately-artificial
> > convention about the amount of available space on an index page.
> > 
> 
> Right - after suggesting it I realized that coding the different index
> overhead for each possible AM would have been ... difficult :-). A patch
> is attached to implement the NULL free bytes and other recommendations:
> 
> 1/ Index free bytes set to NULL
> 2/ Comment added to the README briefly mentioning the index business
> 3/ Columns reordered more logically
> 4/ 'Blockid' column removed
> 5/ Free bytes column renamed to just 'bytes' instead of 'blockfreebytes'
> 
> Now 5/ was only hinted at, but seemed worth doing while I was there
> (hopefully I haven't made it too terse now....).
> 
> cheers
> 
> Mark
> 
> 

> Index: pg_freespacemap.c
> ===================================================================
> RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.c,v
> retrieving revision 1.2
> diff -c -r1.2 pg_freespacemap.c
> *** pg_freespacemap.c	14 Feb 2006 15:03:59 -0000	1.2
> --- pg_freespacemap.c	9 Mar 2006 03:38:10 -0000
> ***************
> *** 12,18 ****
>   #include "storage/freespace.h"
>   #include "utils/relcache.h"
>   
> ! #define		NUM_FREESPACE_PAGES_ELEM 	6
>   
>   #if defined(WIN32) || defined(__CYGWIN__)
>   /* Need DLLIMPORT for some things that are not so marked in main headers */
> --- 12,18 ----
>   #include "storage/freespace.h"
>   #include "utils/relcache.h"
>   
> ! #define		NUM_FREESPACE_PAGES_ELEM 	5
>   
>   #if defined(WIN32) || defined(__CYGWIN__)
>   /* Need DLLIMPORT for some things that are not so marked in main headers */
> ***************
> *** 29,40 ****
>   typedef struct
>   {
>   
> - 	uint32				blockid;
> - 	uint32				relfilenode;
>   	uint32				reltablespace;
>   	uint32				reldatabase;
>   	uint32				relblocknumber;
> ! 	uint32				blockfreebytes;
>   
>   }	FreeSpacePagesRec;
>   
> --- 29,40 ----
>   typedef struct
>   {
>   
>   	uint32				reltablespace;
>   	uint32				reldatabase;
> + 	uint32				relfilenode;
>   	uint32				relblocknumber;
> ! 	uint32				bytes;
> ! 	bool				isindex;
>   
>   }	FreeSpacePagesRec;
>   
> ***************
> *** 91,107 ****
>   
>   		/* Construct a tuple to return. */
>   		tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "blockid",
> ! 						   INT4OID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relblocknumber",
>   						   INT8OID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 6, "blockfreebytes",
>   						   INT4OID, -1, 0);
>   
>   		/* Generate attribute metadata needed later to produce tuples */
> --- 91,105 ----
>   
>   		/* Construct a tuple to return. */
>   		tupledesc = CreateTemplateTupleDesc(NUM_FREESPACE_PAGES_ELEM, false);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 1, "reltablespace",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reldatabase",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "relfilenode",
>   						   OIDOID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 4, "relblocknumber",
>   						   INT8OID, -1, 0);
> ! 		TupleDescInitEntry(tupledesc, (AttrNumber) 5, "bytes",
>   						   INT4OID, -1, 0);
>   
>   		/* Generate attribute metadata needed later to produce tuples */
> ***************
> *** 129,135 ****
>   		fctx->values[2] = (char *) palloc(3 * sizeof(uint32) + 1);
>   		fctx->values[3] = (char *) palloc(3 * sizeof(uint32) + 1);
>   		fctx->values[4] = (char *) palloc(3 * sizeof(uint32) + 1);
> - 		fctx->values[5] = (char *) palloc(3 * sizeof(uint32) + 1);
>   
>   
>   		/* Return to original context when allocating transient memory */
> --- 127,132 ----
> ***************
> *** 158,169 ****
>   				for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
>   				{
>   
> - 					fctx->record[i].blockid = i;
> - 					fctx->record[i].relfilenode = fsmrel->key.relNode;
>   					fctx->record[i].reltablespace = fsmrel->key.spcNode;
>   					fctx->record[i].reldatabase = fsmrel->key.dbNode;
>   					fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page);	
> ! 					fctx->record[i].blockfreebytes = 0;	/* index.*/
>   
>   					page++;
>   					i++;
> --- 155,166 ----
>   				for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
>   				{
>   
>   					fctx->record[i].reltablespace = fsmrel->key.spcNode;
>   					fctx->record[i].reldatabase = fsmrel->key.dbNode;
> + 					fctx->record[i].relfilenode = fsmrel->key.relNode;
>   					fctx->record[i].relblocknumber = IndexFSMPageGetPageNum(page);	
> ! 					fctx->record[i].bytes = 0;	
> ! 					fctx->record[i].isindex = true;	
>   
>   					page++;
>   					i++;
> ***************
> *** 178,189 ****
>   
>   				for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
>   				{
> - 					fctx->record[i].blockid = i;
> - 					fctx->record[i].relfilenode = fsmrel->key.relNode;
>   					fctx->record[i].reltablespace = fsmrel->key.spcNode;
>   					fctx->record[i].reldatabase = fsmrel->key.dbNode;
>   					fctx->record[i].relblocknumber = FSMPageGetPageNum(page);
> ! 					fctx->record[i].blockfreebytes = FSMPageGetSpace(page);	
>   					
>   					page++;
>   					i++;
> --- 175,186 ----
>   
>   				for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
>   				{
>   					fctx->record[i].reltablespace = fsmrel->key.spcNode;
>   					fctx->record[i].reldatabase = fsmrel->key.dbNode;
> + 					fctx->record[i].relfilenode = fsmrel->key.relNode;
>   					fctx->record[i].relblocknumber = FSMPageGetPageNum(page);
> ! 					fctx->record[i].bytes = FSMPageGetSpace(page);	
> ! 					fctx->record[i].isindex = false;	
>   					
>   					page++;
>   					i++;
> ***************
> *** 209,227 ****
>   	if (funcctx->call_cntr < funcctx->max_calls)
>   	{
>   		uint32		i = funcctx->call_cntr;
>   
>   
> - 		sprintf(fctx->values[0], "%u", fctx->record[i].blockid);
> - 		sprintf(fctx->values[1], "%u", fctx->record[i].relfilenode);
> - 		sprintf(fctx->values[2], "%u", fctx->record[i].reltablespace);
> - 		sprintf(fctx->values[3], "%u", fctx->record[i].reldatabase);
> - 		sprintf(fctx->values[4], "%u", fctx->record[i].relblocknumber);
> - 		sprintf(fctx->values[5], "%u", fctx->record[i].blockfreebytes);
>   
>   
>   
>   		/* Build and return the tuple. */
> ! 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, fctx->values);
>   		result = HeapTupleGetDatum(tuple);
>   
>   
> --- 206,246 ----
>   	if (funcctx->call_cntr < funcctx->max_calls)
>   	{
>   		uint32		i = funcctx->call_cntr;
> + 		char		*values[NUM_FREESPACE_PAGES_ELEM];
> + 		int			j;
>   
> + 		/*
> + 		 * Use a temporary values array, initially pointing to fctx->values,
> + 		 * so it can be reassigned w/o losing the storage for subsequent
> + 		 * calls.
> + 		 */
> + 		for (j = 0; j < NUM_FREESPACE_PAGES_ELEM; j++)
> + 		{
> + 			values[j] = fctx->values[j];
> + 		}
> + 
> + 
> + 		sprintf(values[0], "%u", fctx->record[i].reltablespace);
> + 		sprintf(values[1], "%u", fctx->record[i].reldatabase);
> + 		sprintf(values[2], "%u", fctx->record[i].relfilenode);
> + 		sprintf(values[3], "%u", fctx->record[i].relblocknumber);
>   
>   
> + 		/*
> + 		 * Set (free) bytes to NULL for an index relation.
> + 		 */
> + 		if (fctx->record[i].isindex == true)
> + 		{
> + 			values[4] = NULL;
> + 		}
> + 		else
> + 		{
> + 			sprintf(values[4], "%u", fctx->record[i].bytes);
> + 		}
>   
>   
>   		/* Build and return the tuple. */
> ! 		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
>   		result = HeapTupleGetDatum(tuple);
>   
>   
> Index: pg_freespacemap.sql.in
> ===================================================================
> RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/pg_freespacemap.sql.in,v
> retrieving revision 1.2
> diff -c -r1.2 pg_freespacemap.sql.in
> *** pg_freespacemap.sql.in	27 Feb 2006 16:09:48 -0000	1.2
> --- pg_freespacemap.sql.in	9 Mar 2006 03:42:15 -0000
> ***************
> *** 11,17 ****
>   -- Create a view for convenient access.
>   CREATE VIEW pg_freespacemap AS
>   	SELECT P.* FROM pg_freespacemap() AS P
> !  	(blockid int4, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber int8, blockfreebytes int4);
>    
>   -- Don't want these to be available at public.
>   REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC;
> --- 11,17 ----
>   -- Create a view for convenient access.
>   CREATE VIEW pg_freespacemap AS
>   	SELECT P.* FROM pg_freespacemap() AS P
> !  	(reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber int8, bytes int4);
>    
>   -- Don't want these to be available at public.
>   REVOKE ALL ON FUNCTION pg_freespacemap() FROM PUBLIC;
> Index: README.pg_freespacemap
> ===================================================================
> RCS file: /projects/cvsroot/pgsql/contrib/pg_freespacemap/README.pg_freespacemap,v
> retrieving revision 1.1
> diff -c -r1.1 README.pg_freespacemap
> *** README.pg_freespacemap	12 Feb 2006 03:55:53 -0000	1.1
> --- README.pg_freespacemap	9 Mar 2006 03:43:16 -0000
> ***************
> *** 34,45 ****
>   
>          Column     |  references          | Description
>     ----------------+----------------------+------------------------------------
> -    blockid        |                      | Id, 1.. max_fsm_pages
> -    relfilenode    | pg_class.relfilenode | Refilenode of the relation.
>      reltablespace  | pg_tablespace.oid    | Tablespace oid of the relation.
>      reldatabase    | pg_database.oid      | Database for the relation.
>      relblocknumber |                      | Offset of the page in the relation.
> !    blockfreebytes |                      | Free bytes in the block/page.
>   
>   
>     There is one row for each page in the free space map.
> --- 34,45 ----
>   
>          Column     |  references          | Description
>     ----------------+----------------------+------------------------------------
>      reltablespace  | pg_tablespace.oid    | Tablespace oid of the relation.
>      reldatabase    | pg_database.oid      | Database for the relation.
> +    relfilenode    | pg_class.relfilenode | Refilenode of the relation.
>      relblocknumber |                      | Offset of the page in the relation.
> !    bytes          |                      | Free bytes in the block/page, or NULL
> !                   |                      | for an index page (see below).
>   
>   
>     There is one row for each page in the free space map.
> ***************
> *** 47,52 ****
> --- 47,55 ----
>     Because the map is shared by all the databases, there are pages from
>     relations not belonging to the current database.
>   
> +   The free space map can contain pages for btree indexes if they were emptied 
> +   by a vacuum process. The bytes field is set to NULL in this case.
> + 
>     When the pg_freespacemap view is accessed, internal free space map locks are
>     taken, and a copy of the map data is made for the view to display. 
>     This ensures that the view produces a consistent set of results, while not 
> ***************
> *** 58,91 ****
>   -------------
>   
>     regression=# \d pg_freespacemap
> !       View "public.pg_freespacemap"
>         Column     |  Type   | Modifiers 
> !   ---------------+---------+-----------
> !   blockid        | integer | 
> !   relfilenode    | oid     | 
>     reltablespace  | oid     | 
>     reldatabase    | oid     | 
>     relblocknumber | bigint  | 
> !   blockfreebytes | integer | 
>    View definition:
> !   SELECT p.blockid, p.relfilenode, p.reltablespace, p.reldatabase, p.relblocknumber, p.blockfreebytes
> !     FROM pg_freespacemap() p(blockid integer, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber bigint, blockfreebytes integer);
>   
> !   regression=# SELECT c.relname, m.relblocknumber, m.blockfreebytes  
>                  FROM pg_freespacemap m INNER JOIN pg_class c 
>                  ON c.relfilenode = m.relfilenode LIMIT 10;
> !       relname             | relblocknumber | blockfreebytes 
> !   ------------------------+----------------+----------------
> !   sql_features            |              5 |           2696
> !   sql_implementation_info |              0 |           7104
> !   sql_languages           |              0 |           8016
> !   sql_packages            |              0 |           7376
> !   sql_sizing              |              0 |           6032
> !   pg_authid               |              0 |           7424
> !   pg_toast_2618           |             13 |           4588
> !   pg_toast_2618           |             12 |           1680
> !   pg_toast_2618           |             10 |           1436
> !   pg_toast_2618           |              7 |           1136
>     (10 rows)
>   
>     regression=# 
> --- 61,93 ----
>   -------------
>   
>     regression=# \d pg_freespacemap
> !      View "public.pg_freespacemap"
>         Column     |  Type   | Modifiers 
> !  ----------------+---------+-----------
>     reltablespace  | oid     | 
>     reldatabase    | oid     | 
> +   relfilenode    | oid     | 
>     relblocknumber | bigint  | 
> !   bytes          | integer | 
>    View definition:
> !  SELECT p.reltablespace, p.reldatabase, p.relfilenode, p.relblocknumber, p.bytes
> !  FROM pg_freespacemap() p(reltablespace oid, reldatabase oid, relfilenode oid, relblocknumber bigint, bytes integer);
>   
> !   regression=# SELECT c.relname, m.relblocknumber, m.bytes  
>                  FROM pg_freespacemap m INNER JOIN pg_class c 
>                  ON c.relfilenode = m.relfilenode LIMIT 10;
> !       relname             | relblocknumber |  bytes 
> !   ------------------------+----------------+--------
> !   sql_features            |              5 |   2696
> !   sql_implementation_info |              0 |   7104
> !   sql_languages           |              0 |   8016
> !   sql_packages            |              0 |   7376
> !   sql_sizing              |              0 |   6032
> !   pg_authid               |              0 |   7424
> !   pg_toast_2618           |             13 |   4588
> !   pg_toast_2618           |             12 |   1680
> !   pg_toast_2618           |             10 |   1436
> !   pg_toast_2618           |              7 |   1136
>     (10 rows)
>   
>     regression=# 
> 
> 

> 
> ---------------------------(end of broadcast)---------------------------
> TIP 9: In versions below 8.0, the planner will ignore your desire to
>        choose an index scan if your joining column's datatypes do not
>        match

-- 
  Bruce Momjian   http://candle.pha.pa.us
  EnterpriseDB    http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

In response to

pgsql-hackers by date

Next:From: Tom LaneDate: 2006-04-26 22:42:53
Subject: Re: Catalog Access (was: [GENERAL] Concurrency problem
Previous:From: Bruce MomjianDate: 2006-04-26 22:39:09
Subject: Re: [HACKERS] Enhanced containment selectivity function

pgsql-patches by date

Next:From: Bruce MomjianDate: 2006-04-26 22:46:18
Subject: Re: [HACKERS] pg_freespacemap question
Previous:From: Bruce MomjianDate: 2006-04-26 22:39:09
Subject: Re: [HACKERS] Enhanced containment selectivity function

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group