#!/usr/bin/perl

use warnings;
use strict;

my $PG_FILEDUMP = "pg_filedump-8.1";

process_table();
exit();

sub process_table {
	my ($table, @indexes) = read_toc();

	foreach my $elem ($table, @indexes) {
		die "file $elem does not exist" unless -f "$elem";
		if (!-f "$elem.dump") {
			print STDERR "generating pg_filedump for $elem\n";
			`$PG_FILEDUMP -i $elem > $elem.dump`;
		}
	}

	print STDERR "loading unused line pointers for table $table\n";
	my $unused = get_heap_unused($table);

	foreach my $index (@indexes) {
		print STDERR "processing index $index\n";
		process_index($unused, $index);
	}
}

# Reads a "toc" file, which is a description of a table and its indexes.  A
# table line is "table: xxx" where xxx is the relfilenode of the table, and an
# index line is "index: xxx" where xxx is the relfilenode of the index.
sub read_toc {
	my $table;
	my @indexes;

	print STDERR "now expecting a TOC in stdin...\n";
	while (<>) {
		if (/^table: ([0-9]+)$/) {
			$table = $1;
		}
		if (/^index: ([0-9]+)$/) {
			push @indexes, $1;
		}
	}
	print STDERR "finished reading the TOC for table $table\n";
	print STDERR ("indexes: ", join(", ", @indexes), "\n");
	return $table, @indexes;
}

# Reads a pg_filedump -i report for the given table, and fetches into a hash
# all the line pointers that are marked "unused".  Returns the hash so built.
sub get_heap_unused {
	my $table = shift;
	my $curblock;
	my $unused = {};

	my $infile = "$table.dump";
	open IN, "<", "$infile" or die "can't open $infile: $!";
	while (<IN>) {
		if (/^Block\s+([0-9]+) /) {
			$curblock = $1;
		}

		if (/^ Item\s+([0-9]+) -- Length:.*Flags: 0x00/) {
			push @{$unused->{$curblock}}, $1;
		}
	}

	return $unused;
}

# Given an index and a hash built by get_heap_unused, reads the pg_filedump -i
# report for that index and prints a list of all index pointers that point to
# any element in the hash.
sub process_index {
	my $unused = shift;
	my $index = shift;

	my $curblock;
	my $special = 0;
	my $isleaf = 0;
	my $collect = 0;
	my @lines = ();

	my $infile = "$index.dump";

	open IN, "<", "$infile" or die "can't open $infile: $!";
	while (<IN>) {
		if (/^Block\s+([0-9]+) /) {
			$curblock = $1;
		}
		if (/^<Data> --/) {
			$collect = 1;
			next;
		}

		if (/^<Special Section> -----$/) {
			$special = 1;
			next;
		}

		if ($collect) {
			push @lines, $_;
		}

		if ($special) {
			if (/^  Flags.*LEAF/) {
				$isleaf = 1;
			}
		}

		if (/^$/) {
			if ($special) {
				if ($isleaf) {
					report_broken_block($unused,
						$index, $curblock, @lines);
				}
				$isleaf = 0;
				$special = 0;
				@lines = ();
			}
		}
	}
}

# workhorse for process_index; gets an array of lines comprising an index
# block Data entries, and prints those that match the unused hash.
sub report_broken_block {
	my $unused = shift;
	my $indexid = shift;
	my $block = shift;
	my $item;

	foreach my $line (@_) {
		if ($line =~ /^ Item\s+([0-9]+)/) {
			$item = $1;
		}
		if ($line =~ /^  Block Id:\s+([0-9]+)\s+linp Index: ([0-9]+)/) {
			my $blk = $1;
			my $lp = $2;

			next unless defined $unused->{$blk};
			foreach my $hlp (@{$unused->{$blk}})
			{
				next unless $lp eq $hlp;

				print "INDEX PTR TO UNUSED HEAP: ".
				"index $indexid ($block,$item) -> ($blk, $lp)\n";
			}
		}
	}
}
