#!/usr/bin/perl5

#++
# NAME
#	fshash 1
# SUMMARY
#	compute per-block MD5 hashes for file system blocks
# SYNOPSIS
# .ad
# .fi
#	\fBfshash\fR [\fB-b \fIblock_size\fR] [\fB-d \fIdirectory\fR]
#		[\fB-h \fIhash_size\fR] [\fB-i \fIident_string\fR]
#		[\fB-p \fIcommand\fR] [\fB-t \fItimeout\fR] [\fIdevice\fR ...]
# DESCRIPTION
#	The \fBfshash\fR command computes MD5 hashes for successive
#	file system blocks. By default, this command examines all
#	currently mounted file systems.
#
#	The result for each device is named after the device
#	(\fBfshash.\fIdevice\fR), with \fB/\fR replaced by underscore.
#	However, by default all results are sent to the standard output 
#	stream and nothing is stored.
#
#	See the OUTPUT FORMAT section below for output format details.
# .PP
#	Arguments:
# .IP "\fB-b \fIblock_size\fR"
#	The number of bytes over which an MD5 checksum is computed.
#	The default value is set with the \fB$hash_block_size\fR parameter
#	in the \fBcoroner.cf\fR file.
# .IP "\fB-d \fIdirectory\fR"
#	The directory for storing the result as one file per device.
# .IP "\fB-h \fIhash_size\fR"
#	The number of bits of each MD5 checksum that are written to output.
#	This must be a multiple of 8.   The default value is set with the
#	\fB$hash_sum_size\fR parameter in the \fBcoroner.cf\fR file.
# .IP "\fB-i \fIident_string\fR"
#	Identification string. See the \fB-p\fR option. The default value
#	is UNIX time in seconds.
# .IP "\fB-p \fIcommand\fR"
#	Pipe the result for each \fIdevice\fR into a separate instance
#	of the specified \fIcommand\fR. The data format consists of one line:
# .sp
# .ti +4
#	\fB#! \fIident_string\fR \fIfilename\fR
# .sp
#	with the (relative) result file name, followed by the actual result.
# .IP "\fB-t \fItimeout\fR"
#	Per-device time limit. The default value is specified in the
#	\fBcoroner.cf\fR file with the \fB$timeouts{$FSSUM}\fR parameter.
# .IP \fIdevice\fR
#	Zero or more devices that contain a file system. If no \fIdevice\fR
#	is specified, all mounted devices are examined.
# OUTPUT FORMAT
# .ad
# .fi
#	The output format is in time machine format, as described in
#	tm-format(5).
#
#	For every \fIdevice\fR, the output begins with a two-line header that
#	describes the data origin, device name and hash block size.
#
#	The information is followed by a one-line header that lists the names
#	of the data attributes that make up the remainder of the output:
# .IP \fBstatus\fR
#	Status of the block: \fBa\fR (allocated), \fBf\fR (free),
#	\fBo\fR (other, either a bitmap or inode block).
# .IP \fBhash\fR
#	The MD5 checksum.
# SEE ALSO
#	tm-format(5), time machine file format.
#	fssum(1), per-block MD5 checksums for file systems
# LICENSE
#	This software is distributed under the IBM Public License.
# AUTHOR(S)
#	Wietse Venema
#	IBM T.J. Watson Research
#	P.O. Box 704
#	Yorktown Heights, NY 10598, USA
#--

#
# Initialize. Allow TCT_HOME to be specified via the environment.
# This requires some ugly code layout so that the reconfig utility
# will do the right thing. Turn off scaffolding of library routines.
#
if ($ENV{'TCT_HOME'}) { $TCT_HOME = $ENV{'TCT_HOME'}; } else {
$TCT_HOME = "";
}

$debug = 0;
$verbose = 0;
$running_under_grave_robber = 1;

#
# Read configuration info and load support routines.
#
require "$TCT_HOME/conf/coroner.cf";
require "logger.pl";
require "tm_misc.pl";
require "paths.pl";
require "getopts.pl";
require "hostname.pl";
require "ostype.pl";
require "rawdev.pl";

#
# Initialize the logging before running any of the above code.
#
&log_init_path($logfile);

#
# Parse JCL. Take defaults from the coroner.cf file, if specified.
#
chop($hostname = &hostname());
$opt_b = $hash_block_size;
$opt_d = undef;
$opt_h = $hash_sum_size;
$opt_i = time();
$opt_p = undef;
$opt_t = $timeouts{$FSSUM};

$usage = "Usage: $0 [options] [device ...]\
    -b block_size: block size for hashing (default: $opt_b)\
    -d directory: storage for result files\
    -h hash_size: bits per hash (default: $opt_h)\
    -i ident_string: for use with -p (default: $opt_i)\
    -p command: postprocessor\
    -t timeout: per-device time limit (default: $opt_t)\n";

&Getopts("b:d:h:i:p:t:v") || die $usage;

die "fshash: bad block size: $opt_b" unless $opt_b > 0;
die "fshash: bad hash size: $opt_h" unless $opt_h > 0;
$verbose = $opt_v;
$timeouts{$FSSUM} = $opt_t;

#
# A device, a device, a kingdom for a device.
#
if (@ARGV > 0) {

    @devices = @ARGV;

} else {

    #
    # Find out the df command syntax. This is system dependent.
    #
    &determine_os();

    die "fshash: No df command configured!\n" unless $DF && -x $DF;

    if ($OS =~ /FREEBSD[2-4]/ || $OS eq "LINUX2" || $OS =~ /BSDI[2-4]/
	|| $OS eq "OPENBSD2" || $OS eq "SUNOS4") {
	@df = $DF;
    } elsif ($OS eq "SUNOS5") {
	@df = ($DF, "-k");
    } else {
	die "fshash: Don't know df syntax for $OS\n";
    }
    die "fshash: No df syntax found!\n" unless (@df);

    #
    # Find out all mounted file systems.
    #
    &pipe_command(DF, @df, "-|");
    while (<DF>) {
	next if (/Filesystem/);
	$_ .= <DF> unless (/ /);
	chop;
	next unless /^\// && !/^\/proc\s+/;
	($device,$junk,$junk,$junk,$junk,$mount_point) = split(/\s+/, $_);
	$device = &rawdev($device);
	print "device: $device\n"
	    if $verbose;
	push(@devices, $device);
    }
    close(DF);
}

#
# Iterate over all file systems. Emit the time machine header info
# and print the hashes for each block.
#
for $device (@devices) {
    print "Examining: $device\n" if $verbose;
    ($pretty_device = $device) =~ tr/\//_/;
    if ($opt_p) {
	$output_file = "fshash.$pretty_device";
	&log_item("fshash: piping output into command \"$opt_p\"");
	open(OUTPUT, "|$opt_p")
	    || die "fshash: Cannot execute command \"$opt_p\": !$\n";
	print OUTPUT "#! $opt_i $output_file\n";
	$output = OUTPUT;
    } elsif ($opt_d) {
	$output_file = "$opt_d/fshash.$pretty_device";
	open(OUTPUT, ">$output_file")
	    || die "fshash: Cannot create $output_file: $!\n";
	$output = OUTPUT;
    } else {
	$output = STDOUT;
    }
    # &tm_fprint($output, "class", "host", "start_time", "file", "blocksize");
    # &tm_fprint($output, "fshash", $hostname, time(), $device, $opt_b);
    # &tm_fprint($output, "status", "hash");
    &pipe_command(FSSUM, "$FSSUM", "-b$opt_b", "-h$opt_h", $device, "-|")
	|| die "fshash: Cannot execute $FSSUM: $!\n";
    while (<FSSUM>) {
	print $output $_;
    }
    close(FSSUM);
    # XXX This error test does not work!!
    die "fshash: $FSSUM terminated with status $?\n" if $?;
    if ($opt_d || $opt_p) {
	close(OUTPUT) || die "fshash: output write error: !$\n";
    }
}
exit 0;
