#!/usr/bin/perl5

#++
# NAME
#	swaphash 1
# SUMMARY
#	compute per-block MD5 hashes for swap space
# SYNOPSIS
# .ad
# .fi
#	\fBswaphash\fR [\fB-b \fIblock_size\fR] [\fB-d \fIdirectory\fR]
#		[\fB-h \fRhash_size\fR] [\fB-i \fIident_string\fR]
#		[\fB-p \fIcommand\fR] [\fB-t \fItimeout\fR] [\fIswapfile\fR ...]
# DESCRIPTION
#	The \fBswaphash\fR command computes MD5 hashes for successive blocks
#	of swap space. By default, this command examines all currently active
#	swap files.
#
#	The result for each swap file is named after the file
#	(\fBswaphash.\fIswapfile\fR), with \fB/\fR replaced by underscore.
#	However, by default all results are sent to the standard output
#	stream and nothing is stored.
#
#	See the OUTPUT FORMAT section below for output format details.
# .PP
#	Arguments:
# .IP "\fB-b \fIblock_size\fR"
#	The number of bytes over which an MD5 checksum is computed.
#	The default value is set with the \fB$hash_block_size\fR parameter
#	in the \fBcoroner.cf\fR file.
# .IP "\fB-d \fIdirectory\fR"
#	The directory for storing the result as one file per swapfile.
# .IP "\fB-h \fIhash_size\fR"
#	The number of bits of each MD5 checksum that are written to output.
#	This must be a multiple of 8.   The default value is set with the
#	\fB$hash_sum_size\fR parameter in the \fBcoroner.cf\fR file.
# .IP "\fB-i \fIident_string\fR"
#	Identification string. See the \fB-p\fR option. The default value
#	is UNIX time in seconds.
# .IP "\fB-p \fIcommand\fR"
#	Pipe the result for each \fIswapfile\fR a into a separate instance
#	of the specified \fIcommand\fR. The data format consists of one line:
# .sp
# .ti +4
#	\fB#! \fIident_string\fR \fIfilename\fR
# .sp
#	with the (relative) result file name, followed by the actual data.
# .IP "\fB-t \fItimeout\fR"
#	Per-swapfile time limit. The default value is specified in the
#	\fBcoroner.cf\fR file with the \fB$timeouts{$BLOCKSUM}\fR parameter.
# .IP \fIswapfile\fR
#	Zero or more swapfiles. If no \fIswapfile\fR is specified, all
#	currently active swap areas are examined.
# OUTPUT FORMAT
# .ad
# .fi
#	The output format is in time machine format, as described in
#	tm-format(5).
#
#	For every \fIswapfile\fR, the output begins with a two-line header that
#	describes the data origin, swapfile name and hash block size.
#
#	The information is followed by a one-line header that lists the names
#	of the data attributes that make up the remainder of the output:
# .IP \fBhash\fR
#	The MD5 checksum.
# BUGS
#	With the following systems the \fIswapfile\fR needs to be specified
#	on the command line:
# .IP "SunOS 4.x"
#	This system has no system call to look up the swap device name.
# .IP "Linux RedHat <= 6.0"
#	This system has no working "swapon -s" command.
# SEE ALSO
#	tm-format(5), time machine file format.
#	blocksum(1), per-block MD5 checksums
# LICENSE
#	This software is distributed under the IBM Public License.
# AUTHOR(S)
#	Wietse Venema
#	IBM T.J. Watson Research
#	P.O. Box 704
#	Yorktown Heights, NY 10598, USA
#--

#
# Initialize. Allow TCT_HOME to be specified via the environment.
# This requires some ugly code layout so that the reconfig utility
# will do the right thing. Turn off scaffolding of library routines.
#
if ($ENV{'TCT_HOME'}) { $TCT_HOME = $ENV{'TCT_HOME'}; } else {
$TCT_HOME = "";
}

$debug = 0;
$verbose = 0;
$running_under_grave_robber = 1;

#
# Read configuration info and load support routines.
#
require "$TCT_HOME/conf/coroner.cf";
require "logger.pl";
require "tm_misc.pl";
require "paths.pl";
require "getopts.pl";
require "hostname.pl";
require "ostype.pl";
require "major_minor.pl";

#
# Initialize the logging before running any of the above code.
#
&log_init_path($logfile);

#
# Parse JCL. Take defaults from the coroner.cf file, if specified.
#
chop($hostname = &hostname());
$opt_b = $hash_block_size;
$opt_d = undef;
$opt_h = $hash_sum_size;
$opt_i = time();
$opt_p = undef;
$opt_t = $timeouts{$BLOCKSUM};

$usage = "Usage: $0 [options] [swapfile ...]\
    -b block_size: block size for hashing (default: $opt_b)\
    -d directory: storage for result files\
    -h hash_size: bits per hash (default: $opt_h)\
    -i ident_string: for use with -p (default: $opt_i)\
    -p command: postprocessor\
    -t timeout: per-swapfile time limit (default: $opt_t)\n";

&Getopts("b:d:h:i:p:t:v") || die $usage;

die "swaphash: bad block size: $opt_b" unless $opt_b > 0;
die "swaphash: bad hash size: $opt_h" unless $opt_h > 0;
$verbose = $opt_v;
$timeouts{$BLOCKSUM} = $opt_t;

#
# Find out the pathnames of the swap files. This is system dependent.
#
&determine_os();
@pathnames = ();

if (@ARGV > 0) {
    @pathnames = @ARGV;
} elsif ($OS =~ /FREEBSD[2-4]/) {
    die "swaphash: No pstat command configured!\n" unless $PSTAT && -x $PSTAT;
    open(PSTAT, "$PSTAT -s|") || die "swaphash: cannot run $PSTAT: $!\n";
    while (<PSTAT>) {
	next unless(/^(\/\S+)/);
	push(@pathnames, $1);
    }
    close(PSTAT);
} elsif ($OS eq "LINUX2") {
    die "swaphash: No swapon command configured!\n" unless $SWAPON && -x $SWAPON;
    open(SWAPON, "$SWAPON -s|") || die "swaphash: cannot run $SWAPON: $!\n";
    while (<SWAPON>) {
	next unless(/^(\/\S+)/);
	push(@pathnames, $1);
    }
    close(SWAPON);
} elsif ($OS eq "SUNOS5") {
    die "swaphash: No swap command configured!\n" unless $SWAP && -x $SWAP;
    open(SWAP, "$SWAP -l|") || die "swaphash: cannot run $SWAP: $!\n";
    while (<SWAP>) {
	next unless(/^(\/\S+)/);
	push(@pathnames, $1);
    }
    close(SWAP);
} elsif ($OS =~ /BSDI[2-4]/) {
    die "swaphash: No pstat command configured!\n" unless $PSTAT && -x $PSTAT;
    open(PSTAT, "$PSTAT -s|") || die "swaphash: cannot run $PSTAT: $!\n";
    while (<PSTAT>) {
	next unless(/^(\S+)\s+\S+\s+\S+$/);
	push(@pathnames, "/dev/$1");
    }
    close(PSTAT);
} elsif ($OS eq "OPENBSD2") {

    # Find all swap device major/minor numbers. Unfortunately, swapctl(2)
    # produces useless pathnames.

    die "swaphash: No pstat command configured!\n" unless $PSTAT && -x $PSTAT;
    open(PSTAT, "$PSTAT -ns|") || die "swaphash: cannot run $PSTAT: $!\n";
    while (<PSTAT>) {
	if (/(\d+,\d+)/) {
	    $device_hash{$1} = 1;
	    print "add swap device major,minor: $1\n" if $verbose;
	}
    }
    close(PSTAT);

    # Map device major/minor numbers to pathnames. This should be a
    # standard routine.

    opendir(DIR, "/dev")
	|| die "swaphash: Cannot open /dev: $!\n";
    while ($file = readdir(DIR)) {
	$device = "/dev/$file";
	next unless (-b $device || -c $device);
	$st_rdev = (stat(_))[6];
	$major_minor = &dev_major($st_rdev) . "," . &dev_minor($st_rdev);
	print "device: $device; major_minor: $major_minor\n" if $verbose;
	if (defined($device_hash{$major_minor})) {
	    print "add swap device pathname: $device\n" if $verbose;
	    push(@pathnames, $device);
	    delete($device_hash{$major_minor});
	    last unless ($#device_hash >= 0);
	}
    }
    closedir(DIR);

    # Do not allow unknown swap devices.

    for (keys %device_hash) {
	die "swaphash: Cannot find the device for major/minor $_\n";
    }
} else {
    die "swaphash: Don't know how to get swap file pathnames for $OS\n";
}
die "swaphash: No swap file information found!\n" unless (@pathnames);

#
# Iterate over all swap files. Emit the time machine header info
# and print the hashes for each block.
#
for $swapfile (@pathnames) {
    print "Examining: $swapfile\n"
	if $verbose;
    ($pretty_swap = $swapfile) =~ tr/\//_/;
    if ($opt_p) {
	$output_file = "swaphash.$pretty_swap";
	 &log_item("swaphash: piping output into command \"$opt_p\"");
	 open(OUTPUT, "|$opt_p")
	     || die "swaphash: Cannot execute command \"$opt_p\": !$\n";
	 print OUTPUT "#! $opt_i $output_file\n";
	$output = OUTPUT;
    } elsif ($opt_d) {
	$output_file = "$opt_d/swaphash.$pretty_swap";
	 open(OUTPUT, ">$output_file")
	     || die "swaphash: Cannot create $output_file: $!\n";
    } else {
	$output = STDOUT;
    }
    &tm_fprint($output, "class", "host", "start_time", "file", "blocksize");
    &tm_fprint($output, "swaphash", $hostname, time(), $swapfile, $opt_b);
    &tm_fprint($output, "hash");
    &pipe_command(BLOCKSUM, "$BLOCKSUM", "-b$opt_b", "-h$opt_h", $swapfile, "-|")
	|| die "swaphash: Cannot execute $BLOCKSUM: $!\n";
    while (<BLOCKSUM>) {
	print $output $_;
    }
    close(BLOCKSUM);
    # XXX This error test does not work!!
    warn "swaphash: $BLOCKSUM terminated with status $?\n" if $?;
    if ($opt_p || $opt_d) {
	close(OUTPUT) || warn "swaphash: output write error: $!\n";
    }
}
exit 0;
