/*
 * munge.c -- Program to convert a text file into "munged" form,
 *            suitable for reconstruction from printed form.  Tabs are
 *            made visible and checksums are added to each line and each
 *            page to protect against transcription errors.
 *
 * Copyright (C) 1997 Pretty Good Privacy, Inc.
 *
 * Designed by Colin Plumb, Mark H. Weaver, and Philip R. Zimmermann
 * Written by Mark H. Weaver
 *
 * $Id: munge.c,v 1.18 1997/07/09 15:07:49 colin Exp $
 */

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <ctype.h>

#include "crc.h"

/*
 * The file is divided into pages, and the format of each page is
 *
--f414 000b2dc79af40010002 Page 1 of munge.c

 bc38e5 /*
 40a838  * munge.c -- Program to convert a text file into munged form
 647222  *
 193f28  * Copyright (C) 1997 Pretty Good Privacy, Inc.
 827222  *
 699025  * Designed by Colin Plumb, Mark H. Weaver, and Philip R. Zimmermann
 0d050c  * Written by Mark H. Weaver
 *
 * Where the first 2 columns are the high 8 bits (in hex) of a running
 * CRC-32 of the page (the string "--", unlikely to be confused with
 * any digits, indicates a page header line) and the next 4 columns
 * are a CRC-16 of the rest of the line.  Then a space (not counted in
 * the CRC), and the line of text.  Tabs are printed as the currency
 * symbol (ISO Latin 1 character 164) followed by the appropriate number
 * of spaces, and any form feeds are printed as a yen symbol (Latin 1 165).
 * The CRC is computed on the transformed line, including the trailing
 * newline.  No trailing whitespace is permitted.
 *
 * The header line contains a (hex) number of the form 0ffcccccccctpppnnnn,
 * where the digit 0 is a version number, ff are flags, ccccccc is the CRC-32
 * of the page, t is the tab size (usually 4 or 8; 0 for binary files that
 * are sent in radix-64), ppp is the product number (usually 1, different
 * for different books), and nnnn is the file number (sequential from 1).
 *
 * This is followed by " Page %u of " and the file name.
 */

typedef struct MungeState
{
	int				binaryMode, tabWidth;
	long			origLineNumber;
	long			productNumber, fileNumber, pageNumber, lineNumber;
	unsigned long	fileOffset;
	word32			runningCRC;
	char const *	fileName;
	char const *	fileNameTail;
	char *			pageBuffer;	/* Buffer large enough to hold one page */
	char *			pagePos;	/* Current position in pageBuffer */
	word16			hdrFlags;
	FILE *			file;
	FILE *			out;
} MungeState;

void ChecksumLine(char const *line, size_t length,
			   char *prefix, word32 *runningCRC)
{
	word16	lineCRC;
	byte	runCRCPart = 0;

	lineCRC = CalculateCRC16(0, (byte const *)line, length);
	if (runningCRC != NULL)
	{
		*runningCRC = CalculateCRC32(*runningCRC, (byte const *)line, length);
		runCRCPart = (*runningCRC >> 24);
	}
	sprintf(prefix, (FMT8 FMT16), runCRCPart, lineCRC);
	prefix[6] = ' ';	/* Write a space over the null byte */
}

/* Returns 1 for convenience */
int PrintFileError(MungeState *state, char const *message)
{
	fprintf(stderr, "%s in %s %s %lu\n", message, state->fileName,
			state->binaryMode ? "offset" : "line",
			state->binaryMode ? state->fileOffset : state->origLineNumber);
	return 1;
}

int MungeLine(MungeState *state, char *buffer, int length,
			  char *line, int *bufferUsed)
{
	int		i, j;
	char	ch;

	if (length < 1 || buffer[length - 1] != '\n')
	{
		buffer[length++] = '\n';
		buffer[length] = '\0';
	}
/*	return PrintFileError(state, "ERROR: Missing newline at end of file"); */

	i = 0;
	j = 0;
	for (i = 0; i < length && j <= LINE_LENGTH; i++)
	{
		ch = buffer[i];
		if (ch == '\t')
		{
			line[j++] = TAB_CHAR;
			if (state->tabWidth < 1)
				return PrintFileError(state,
									  "ERROR: Tab found in radix64 stream");
			else
				while (j % state->tabWidth && j <= LINE_LENGTH)
					line[j++] = TAB_PAD_CHAR;
		}
		else if (ch == '\n')
		{
			if (++i < length)
				return PrintFileError(state,
								"UNEXPECTED ERROR: fgets read past newline!?");
			break;
		}
		else if (ch == '\f')
		{
			i++;
			line[j++] = FORMFEED_CHAR;
			break;
		}
		else if (ch >= ' ' && ch <= '~')
			line[j++] = ch;
		else
			return PrintFileError(state, "ERROR: Non-ASCII char");
	}
	/* Strip trailing spaces */
	while (j > 0 && isspace((unsigned char)line[j - 1]))
		j--;

	if (j > LINE_LENGTH)
		return PrintFileError(state, "ERROR: Line too long");

	/* Add trailing newline and NULL */
	line[j++] = '\n';
	line[j++] = '\0';

	/* Return number of chars used from buffer */
	*bufferUsed = i;

	return 0;
}

static void
Encode3(byte const src[3], char dest[4])
{
	dest[0] = radix64Digits[                     (src[0]>>2 & 0x3f)];
	dest[1] = radix64Digits[(src[0]<<4 & 0x30) | (src[1]>>4 & 0x0f)];
	dest[2] = radix64Digits[(src[1]<<2 & 0x3c) | (src[2]>>6 & 0x03)];
	dest[3] = radix64Digits[(src[2]    & 0x3f)];
}

static int
EncodeLine(byte const *src, int srcLen, char *dest)
{
	char *	destp = dest;
	byte	tempSrc[3];

	for (; srcLen >= 3; srcLen -= 3)
	{
		Encode3(src, destp);
		src += 3; destp += 4;
	}

	if (srcLen > 0)
	{
		memset(tempSrc, 0, sizeof(tempSrc));
		memcpy(tempSrc, src, srcLen);
		Encode3(src, destp);
		src += 3; destp += 4; srcLen -= 3;
		while (srcLen < 0)
			destp[srcLen++] = '=';
	}

	return destp - dest;
}

static int
MungeBinaryLine(MungeState *state, byte const *buffer, int length, char *line)
{
	char	binLine[128];
	int		binLength;			/* Destination length */
	int		used;

	binLength = EncodeLine(buffer, length, binLine);

	/* Append newline */
	binLine[binLength++] = '\n';
	binLine[binLength] = '\0';

	return MungeLine(state, binLine, binLength, line, &used);
}

int MaybePageBreak(MungeState *state)
{
	if (state->lineNumber >= LINES_PER_PAGE)
	{
		char	line[512];
		char *	lineData = line + PREFIX_LENGTH;

		sprintf(lineData, "%01x%02x%08lx%01x%03lx%04lx Page %ld of %s\n",
				0,						/* 1: Format version 0 */
				state->hdrFlags,		/* 2: Flags */
				state->runningCRC,		/* 8: Running CRC32 */
				state->tabWidth,		/* 1: Tab width (0 means radix64) */
				state->productNumber,	/* 3: Product number (0 - 4095) */
				state->fileNumber,		/* 4: File number (0 - 65535) */
				state->pageNumber + 1,
				state->fileNameTail);

		if (strlen(lineData) > LINE_LENGTH + 1)
		{
			PrintFileError(state, "ERROR: Header line too long");
			fprintf(stderr, "> %s", lineData);
			return -1;
		}

		/* Compute checksums and prefix them to line */
		ChecksumLine(lineData, strlen(lineData), line, NULL);

		fprintf(state->out, "--%s\n%s", line + 2, state->pageBuffer);

		state->pageNumber++;
		state->lineNumber = 0;
		state->runningCRC = 0;
		state->pagePos = state->pageBuffer;		/* Clear page buffer */
	}
	return 0;
}

/*
 * Search for Emacs "tab-width: " maker in file.
 * Emacs is stricter about the format, but this will do.
 */
int FindTabWidth(MungeState *state)
{
	char const * const	tabWidthMarker = " tab-width: ";
	char				buffer[512];
	char *				p;
	int					length;
	int					tabWidth = 0;

	fseek(state->file, -(sizeof(buffer) - 1), SEEK_END);
	length = fread(buffer, 1, sizeof(buffer) - 1, state->file);
	buffer[length] = '\0';
	p = strstr(buffer, tabWidthMarker);
	if (p != NULL)
	{
		p += strlen(tabWidthMarker);
		while (*p != '\0' && *p != '\n' && isspace(*p))
			p++;
		tabWidth = strtol(p, &p, 10);
		while (*p != '\0' && *p != '\n' && isspace(*p))
			p++;
		if (*p != '\n' || tabWidth < 2)
			tabWidth = 0;
		else if (tabWidth > 16)
			fprintf(stderr, "WARNING: Weird tab-width (%d), %s\n",
							tabWidth, state->fileName);
	}
	return tabWidth;
}

/*
 * Open the given source file and send the munged output to the
 * FILE *, with the given options.
 */
int MungeFile(char const *fileName, FILE *out, int binaryMode,
			  int defaultTabWidth, long productNumber, long fileNumber)
{
	MungeState *	state;
	int				length, used;
	char			line[PREFIX_LENGTH + LINE_LENGTH + 10];
	char *			lineData = line + PREFIX_LENGTH;
	char			buffer[128];
	int				result = 0;

	state = (MungeState *)calloc(1, sizeof(*state));
	state->origLineNumber = 0;
	state->fileName = fileName;
	state->runningCRC = 0;
	state->productNumber = productNumber;
	state->fileNumber = fileNumber;
	state->pageNumber = 0;
	state->lineNumber = 0;
	state->fileOffset = 0;
	state->binaryMode = binaryMode;
	state->pageBuffer = malloc(PAGE_BUFFER_SIZE);
	state->pageBuffer[0] = '\0';
	state->pagePos = state->pageBuffer;
	state->hdrFlags = 0;
	state->out = out;

	state->fileNameTail = strrchr(state->fileName, '/');
	if (state->fileNameTail == NULL)
		state->fileNameTail = state->fileName;
	else
		state->fileNameTail++;

	state->file = fopen(state->fileName, binaryMode ? "rb" : "r");
	if (state->file == NULL)
	{
		result = errno;
		goto error;
	}
	
	if (state->binaryMode)
	{
		state->tabWidth = 0;
	}
	else
	{
		state->tabWidth = FindTabWidth(state);
		if (state->tabWidth == 0)
			state->tabWidth = defaultTabWidth;
		rewind(state->file);
	}

	while (!feof(state->file))
	{
		state->origLineNumber++;

		if (state->binaryMode)
		{
			length = fread(buffer, 1, BYTES_PER_LINE, state->file);
			if (length < 1)
			{
				if (feof(state->file))
					break;
				goto fileError;
			}
			if ((result = MaybePageBreak(state)))
				goto error;
			if ((result = MungeBinaryLine(state, buffer, length, lineData)))
				goto error;
			state->fileOffset += length;
		}
		else
		{
			if (fgets(buffer, sizeof(buffer), state->file) == NULL)
			{
				if (feof(state->file))
					break;
				goto fileError;
			}
			length = strlen(buffer);
			if ((result = MaybePageBreak(state)))
				goto error;
			if ((result = MungeLine(state, buffer, length, lineData, &used)))
				goto error;

			if (used < length)
				if (fseek(state->file, used - length, SEEK_CUR))
					goto fileError;
		}

		/* Compute checksums and prefix them to the line */
		ChecksumLine(lineData, strlen(lineData), line, &state->runningCRC);

		strcpy(state->pagePos, line);
		length = strlen(state->pagePos);
		/* Suppress trailing whitespace on blank lines */
		if (length == PREFIX_LENGTH+1 && state->pagePos[length-1] == '\n') {
			state->pagePos[--length-1] = '\n';
			state->pagePos[length] = '\0';
		}
		state->pagePos += length;

		state->lineNumber++;
	}

	if (state->lineNumber > 0)
	{
		/* Force a final page break */
		state->lineNumber = LINES_PER_PAGE;
		state->hdrFlags |= HDR_FLAG_LASTPAGE;
		if ((result = MaybePageBreak(state)))
			goto error;
	}

	result = 0;
	goto done;

fileError:
	result = ferror(state->file);

error:
done:
	if (state != NULL)
	{
		if (state->file != NULL)
			fclose(state->file);
		free(state);
	}
	return result;
}

int main(int argc, char *argv[])
{
	int		result = 0;
	int		i, j;
	int		defaultTabWidth = 4;
	int		binaryMode = 0;

	InitCRC();

	for (i = 1; i < argc && argv[i][0] == '-'; i++)
	{
		if (0 == strcmp(argv[i], "--"))
		{
			i++;
			break;
		}
		for (j = 1; argv[i][j] != '\0'; j++)
		{
			if (isdigit(argv[i][j]))
			{
				defaultTabWidth = argv[i][j] - '0';
				if (defaultTabWidth < 2 || defaultTabWidth > 9)
					fprintf(stderr, "WARNING: Weird default tab-width (%d)\n",
									defaultTabWidth);
			}
			else if (argv[i][j] == 'b')
			{
				binaryMode = 1;
			}
			else
			{
				fprintf(stderr, "ERROR: Unrecognized option -%c\n", argv[i][j]);
				exit(1);
			}
		}
	}

	for (; i < argc; i++)
	{
		if ((result = MungeFile(argv[i], stdout, binaryMode,
								defaultTabWidth, 1, argc)) != 0)
		{
			/* If result > 0, message should have already been printed */
			if (result < 0)
				fprintf(stderr, "ERROR: %s\n", strerror(result));
			exit(1);
		}
	}
	
	return 0;
}

/*
 * Local Variables:
 * tab-width: 4
 * End:
 * vi: ts=4 sw=4
 * vim: si
 */

