/*++
/* NAME
/*	blocksum 1
/* SUMMARY
/*	generic block-oriented checksummer
/* SYNOPSIS
/* .ad
/* .fi
/*	\fBblocksum\fR [\fB-v\fR] [\fB-b \fIblock_size\fR]
/*		[\fB-h \fIhash_size\fR] [\fB-l \fIlength\fR]
/*		[\fIfile\fR ...]
/* DESCRIPTION
/*	\fBblocksum\fR reads the named \fIfiles(s)\fR and outputs
/*	one line with an ASCII encoded hexadecimal hash for each block,
/*	including any partial last block at the end of each file.
/*	The hash function is based on the MD5 algorithm.
/* .PP
/*	 Arguments:
/* .IP "\fB-b \fIblocksize\fR"
/*	The number of bytes over which an MD5 hash is computed.
/*	The default is 1024 bytes.
/* .IP "\fB-h \fIhash_size\fR"
/*	The number of output bits per hash. The number must be a multiple
/*	of 8. The default number of bits per hash is 16 bits.
/*	The maximal number of bits is 128 (i.e. the full output
/*	from the MD5 algorithm).
/* .IP "\fB-l \fIlength\fR"
/*	Read at most \fIlength\fR bytes from each \fIfile\fR.
/* .IP \fB-v\fR
/*	Enable verbose mode, output to stderr.
/* .IP "\fIfile ...\fR"
/*	Zero or more files to read from. By default the program reads
/*	from standard input. The output from each file begins on a block
/*	boundary, even when the preceding file ended with a partial block.
/* BUGS
/*	Normally, a program like \fBblocksum\fR would mmap() its input
/*	files in order to avoid memory to memory copies. However, this
/*	would be the wrong approach with TCT, where one wants to capture
/*	information from swap space and from physical memory without
/*	disturbing it unnecessarily.
/* LICENSE
/*	This software is distributed under the IBM Public License.
/* AUTHOR(S)
/*	Wietse Venema
/*	IBM T.J. Watson Research
/*	P.O. Box 704
/*	Yorktown Heights, NY 10598, USA
/*
/*	The MD5 implementation used by this program is placed in the
/*	public domain for free general use by RSA Data Security.
/*--*/

/* System library. */

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>

#if defined(SUNOS4)
extern char *optarg;
extern int optind;
extern int getopt();

define STRTOUL strtol
#endif

#if defined(FREEBSD2) || defined(FREEBSD3) || defined(FREEBSD4) \
	|| defined(BSDI2) || defined(BSDI3) || defined(BSDI4) \
	|| defined(OPENBSD2) \
	|| defined(SUNOS5) \
	|| defined(LINUX2)
#define STRTOUL	strtoul
#endif

/* TCT library. */

#include <error.h>
#include <mymalloc.h>

/* Application-specific. */

#include <global.h>
#include <md5.h>

#define DEFAULT_BLOCK_SIZE	1024	/* bytes */
#define DEFAULT_HASH_SIZE	16	/* bits */
#define MD5_HASH_LENGTH		16	/* characters */

/* blocksum - sum blocks of various sizes */

static void blocksum(FILE *fp, char *buf, unsigned long block_size,
		             int hash_size, unsigned long read_length)
{
    MD5_CTX md;
    unsigned char sum[MD5_HASH_LENGTH];
    static char hex[] = "0123456789abcdef";
    unsigned long left;
    int     todo;
    int     i;
    int     count;

    /*
     * Iterate over the input, including any partial last block. Treat the
     * read_length value as an upper bound. The actual file may be smaller.
     */
    for (left = read_length; left > 0; left -= count) {
	todo = (left > block_size ? block_size : left);
	if ((count = fread(buf, 1, todo, fp)) <= 0)
	    break;
	MD5Init(&md);
	MD5Update(&md, buf, count);
	MD5Final(sum, &md);
	for (i = 0; i < hash_size / 8; i++) {
	    putchar(hex[(sum[i] >> 4) & 0xf]);
	    putchar(hex[sum[i] & 0xf]);
	}
	putchar('\n');
    }
}

/* usage - explain and terminate */

static void usage(const char *text)
{
    if (text)
	remark("%s", text);
    error("usage: %s [-b (block size)] [-h (hash size)] [-l read_length] [-v] ] [file... ]",
	  progname);
}

int     main(int argc, char **argv)
{
    char   *buf;
    unsigned long block_size = DEFAULT_BLOCK_SIZE;
    int     hash_size = DEFAULT_HASH_SIZE;
    unsigned long read_length = ~0;
    int     ch;
    FILE   *fp;
    char   *cp;

    progname = argv[0];

    /*
     * Parse command-line options.
     */
    while ((ch = getopt(argc, argv, "b:h:l:v")) > 0) {
	switch (ch) {
	default:
	    usage((char *) 0);
	case 'b':
	    block_size = STRTOUL(optarg, &cp, 0);
	    if (*cp || cp == optarg || block_size <= 0)
		usage("invalid -b option value");
	    break;
	case 'h':
	    if ((hash_size = atoi(optarg)) <= 0)
		usage("invalid -h option value");
	    if (hash_size % 8)
		usage("-h option requires multiple of 8");
	    if (hash_size > 128)
		usage("-h option value must not exceed 128");
	    break;
	case 'l':
	    read_length = STRTOUL(optarg, &cp, 0);
	    if (*cp || cp == optarg || read_length <= 0)
		usage("invalid -l option value");
	    break;
	case 'v':
	    verbose++;
	    break;
	}
    }

    /*
     * One-off initializations. XXX Normally one would use mmap() to avoid
     * memory to memory copies, but mmapp()ing the swap space or physical
     * memory seems weird.
     */
    buf = mymalloc(block_size);

    /*
     * Process the named inputs. If more than one file is specified, read
     * each file up to the specified limit.
     */
    if (optind == argc) {
	blocksum(stdin, buf, block_size, hash_size, read_length);
    } else {
	for ( /* void */ ; optind < argc; optind++) {
	    if ((fp = fopen(argv[optind], "r")) == 0)
		error("open %s: %m", argv[optind]);
	    blocksum(fp, buf, block_size, hash_size, read_length);
	    fclose(fp);
	}
    }
    return (0);
}
