/*
The contents of this file contain text and code describing and 
implementing the 'DES' encryption algorithm. Despite the fact 
that this information is freely available overseas, it remains 
a violation of ITAR and/or EAR to export this information 
from inside the US or Canada to outside the US or Canada, or 
to pass it to a non-US or non-Canadian citizen within the US 
or Canada. The US Government evidently defines 'Export' to 
include placing this information on a non-restricted FTP server 
or Web site. Please do not do so, and be sure that any person you
pass this on to is made aware of this restriction.
									Peter Trei
									ptrei@acm.org

 * THIS SOFTWARE IS PROVIDED BY PETER TREI ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.

This software is copyright (c) 1997 Peter Trei (ptrei@acm.org), except for
those portions written by Phil Karn, which retain their
original ownership.

This software may be redistributed freely for use in the RSA DES Challenge,
but please obey the restrictions imposed by the US Government, and make
sure that anyone you pass it to is also aware of them.

This software may not be used for commercial purposes without the written
permission of Peter Trei and the other owners.

Please redistribute only as a complete, unmodified package, including 
source code, and ptrei@acm.org's PGP signature file and key.

 */



#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#ifndef WIN32
#include <time.h>
#endif
#include <sys/timeb.h>

#include "deskr.h"

unsigned int bit8a[]={0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80};

unsigned long *tmp;
/*
TESTGRAY is a compile-time switch which enables testing
of the use of Gray codes to step through the key schedule.
In the indicated code of main(), decomment either the use
of do_lower_16 or do_upper_16. It will run through the 
2^16 settings covered by that routine, setting the q[]
array to the number or times each one is hit. If the q[]
array is filled with ones, then the test was successful. 
*/



static unsigned char startkey[8]=				/* start point */
    {0x01,0x23,0x40,0x67,0x89,0xa8,0x00,0x00};
static unsigned char endkey[8]=					/* end point */
    {0x01,0x23,0x40,0x67,0x89,0xaf,0xff,0xff};


#if 0
 these are the originals, spanning the target key 
static unsigned char startkey[8]=				/* start point */
    {0x01,0x23,0x45,0x67,0x89,0xab,0xc8,0x00};
static unsigned char endkey[8]=					/* end point */
    {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xff};
#endif

static int bytebit[] = {
	0200,0100,040,020,010,04,02,01
};

unsigned char targkey2[8]=				/* key to find */
    {0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef};
unsigned long xh , xl;

	long elapsed;
	double s;
	int y;
#ifdef WIN32
/* 	MSVC++ inline assembler macro for updating key schedule
    this macro assumes:
	pointer to key schedule in eax,
	pointer to key diff table in ecx
	esi, edi, ebx, edx are for working data 
	i is the number of bytes past the start of the arrays. 
	*/
/*
#define keyupdatemacro(i)\
	{\
	__asm mov edi, [eax + i]\
	__asm mov esi, [ebx + i + 4]\
	__asm xor edi, [ecx + i]\
	__asm xor esi, [edx + i + 4]\
	__asm mov [eax + i], edi\
	__asm mov [ebx + i + 4], esi\
	}
The above macro takes 5 clocks to run. The one below, 4.*/
#define keyupdatemacro(i)\
	{\
	__asm mov ebx,		[eax+i]\
	__asm mov esi,		[eax+i+4]\
	__asm mov edx,		[ecx+i]\
	__asm mov edi,		[ecx+i+4]\
	__asm xor ebx,		edx\
	__asm xor esi,		edi\
	__asm mov [eax+i],	ebx\
	__asm mov [eax+i+4],esi\
	}
/* this is one I'm working on for key bit 0 only */
#define key0updatemacro(a,b,c,d)\
	{\
	__asm  mov ebx, [eax+a]\
	__asm  mov ecx, [eax+c]\
	__asm  xor ebx, b\
	__asm  xor ecx, d\
	__asm  mov [eax+a], ebx\
	__asm  mov [eax+c], ecx\
	}
#endif /* WIN32 */

/*
There are several ways which keys are refered to in this code, and several
forms in which they appear.

'uncrunched' describes keys stored as 64 bit values in 8 byte arrays of
unsigned char. This is the 'external' form of keys, for human consumption.

'crunched' keys are stored in two unsigned longs, refered to as 'high'
and 'low'. The low, or 'l' long contains the low order 32 bits. The 
high, or 'h' long contains the high order 24 bits. The bits get reordered
as we convert from uncrunched to crunched, as parity bits are dropped, and
the eight bits not used in round 0 are reshuffled to put them into the
least significant bits of the l long.

The 'h' long of the crunched key defines the 'chunk' of 32 bits of 
keyspace under test. There are 2^24 'chunks'.

The key under actual test is only a distant cousin of the values
in h and l. While h accurately describes 24 bits of the key under
test, the l long is handled quite differently.

The lower 32 bits are handled in a modified Gray code order. Gray codes
are a system of iterating binary values so that successive values 
differ in only one bit.

Part of the initial setup creates an array called gray8. This has 256
entries, describing which bit changed from the previous entry. - thus,
byte8[10] specifies which bit changed between the 10th and 11th values
in Gray code order (remember, it's a 0-based array).

Four integers - b0, b1, b2, and b3 are maintained, each refering to one
byte in the low long - b0 refers to the lowest, most rapidly varying 
byte.

As we step through the keys in a chunk, we iterate through the 256 b0
values in Gray order, then step b1 to it's next Gray value. Carrying
this on to b2 and b3, we eventually step through all 2^32 possible
l long values.

Thus, we step through the keys in a manner which (usually) varies by only
one bit. This means that we need only xor the subkey value of one key bit
to update the key schedule.

This bizarre mechanism significantly speeds up the key scheduling. When
we have a hit, there's a routine to regenerate an actual key from the 
key schedule. 

Keep in mind that our goal is to test all the keys without overlap - it's
not important to test them in order.

*/
/*********************/
void decrypt_challenge(unsigned char key[8])
/* decrypt the entire challenge, using the supplied key. This
   is used for the final output */
{
	unsigned long sched[16][2];
	unsigned long kh,kl;
	unsigned char tmp[8];
	unsigned char next_iv[8];

	int x,y;
	char c;

	crunchkey(key,(long *)&kh,(long *)&kl); /* crunch the key down to the 2 long format */
	deskey3(sched,kh,kl,1); /* generate the key schedule */
	x = 0; y = 0;
    while (x < ciphertext_len)
		{
		/* copy the next block of cipher text into the working area 
		   and save a copy for the next IV */
		for (y = x; y < x+8; y++) 
			{
			tmp[y-x] = full_ciphertext[y];
			next_iv[y-x] = full_ciphertext[y];
			}
		/* run the decrypt */
		initial_perm(tmp);
		do_first_round(sched);
		do_middle_rounds(sched);
		do_last_round(sched);
		final_perm(tmp);

		/* xor with the IV or the previous ciphertext */
		for (y = 0; y < 8; y++) tmp[y] ^= iv[y];
		/* copy the next_iv into the iv, for use in the next block */
		for (y = 0; y < 8; y++) iv[y] = next_iv[y];
		/* copy the decrypted block into the output buffer */
		for (y = x; y < x+8; y++) output[y] = tmp[y-x];
		x += 8;
		}
	/* strip off the trailing padding */
	c = output[ciphertext_len-1];
	output[(ciphertext_len - c)] = '\0';
#if 0
	printf("%s\n",output);
#endif

}
/*********************/
void update_key_schedule()
{
	/*
	This routine updates the key schedule bits controlled by b0, and if
	neccesary, b1. It's included because I don't want to write the code
	twice - if we bail out due to a half match, we need to update before
	we can continue. In the final code, doing all this jumping to
	subroutines probably loses us some time, and this code should be 
	inlined.
	*/
	int z;
#ifdef WIN32
	static unsigned long pks, pkdf;
#endif /* WIN32 */

	/* byte 0, the low order, most swiftly changing bits.*/

	bi = gray8[b0++];/* which bit varies on this key change?*/

#ifdef WIN32
	if (Asmversion)	 // update the key schedule, in fast assembler
		{
		pks = (unsigned long)&knd[0][0];
#if 1

		if (bi == 0)
			{
/*
Here's a real tweaky hack - It's probably not finished by the
time you see this.

Half of the time, it's bit 0 of the key being flipped. We can 
win several percent by putting in special case code for this.

Flipping bit 0 changes 15 of the subkeys, but only one half of
each subkey. Therefore, we can get away with xoring the table
into only half the subkey, for the 14 subkeys we're interested
in (1-14 - 0 & 15 don't contribute)

But wait, there's more! 

The same value is xored into two sub-subkeys in most cases. This
can save us even more cycles, if we were to do xor reg,reg 

  The values seem to be (I have not checked this yet:



key val			offset	used
0,0 00000000 	0
1,0 00080000 	8		x
2,0 00000000 	10
3,0 00000000 	18
4,0 00100000 	20		x
5,0 00000000	28
6,0 00040000 	30		x
7,0 00000000	38
0,1 00000000 	4
1,1 00000000 	c
2,1 40000000 	14		x
3,1 00040000 	1c		x
4,1 00000000 	24
5,1 10000000 	2c		x
6,1 00000000 	34
7,1 04000000	3c		x
8,0 20000000 	40		x
9,0 00000000 	48
a,0 00000000 	50
b,0 40000000 	58		x
c,0 00400000 	60		x
d,0 00000000 	6c
e,0 00800000	70		x
f,0 10000000	7c		
8,1 00000000 	44
9,1 00080000 	4c		x
a,1 20000000 	54		x
b,1 00000000 	5c
c,1 00000000 	64
d,1 08000000 	6c		x
e,1 00000000 	74
f,1 00000000	7c

  pulling out the non-zero values, in rounds 1-e:

key val			offset	used
3,1 00040000 	1c		x
6,0 00040000 	30		x
1,0 00080000 	8		x
9,1 00080000 	4c		x
4,0 00100000 	20		x
c,0 00400000 	60		x
e,0 00800000	70		x
7,1 04000000	3c		x
d,1 08000000 	6c		x
5,1 10000000 	2c		x
8,0 20000000 	40		x
a,1 20000000 	54		x
2,1 40000000 	14		x
b,0 40000000 	58		x




It seems that xor reg, immed and xor reg, reg both take 1 cycle.
Lets try xoring immediates, and see how it runs - it takes more
bytes but fewer instructions.

If I get this working, it may be able to do the key update in 
21 clocks, instead of the normal 56, about half of the time. I'm
still working on it, which is why it's #if 0'd out.
*/
		__asm  mov eax, pks;
		__asm clc;

		key0updatemacro(0x1c,0x00040000, 0x30,0x00040000);
		key0updatemacro(0x08,0x00080000, 0x4c,0x00080000);
		key0updatemacro(0x20,0x00100000, 0x2c,0x10000000);
		key0updatemacro(0x70,0x00800000, 0x3c,0x04000000);
		key0updatemacro(0x6c,0x08000000, 0x60,0x00400000);
		key0updatemacro(0x40,0x20000000, 0x54,0x20000000);
		key0updatemacro(0x14,0x40000000, 0x58,0x40000000);

		}
		else
		{
#endif /* #if 0 */
		pkdf = (unsigned long)&drkeydiffs[bi][0][0];

		/* this macro assumes:
		pointer to key schedule in eax,
		pointer to key diff table in ecx
		esi, edi, ebx, edx are for working data 
		i is the number of bytes past the start of the arrays. 
		*/


__asm	mov eax, pks;
__asm	mov ecx, pkdf;
__asm	clc; /* this looks superfluous, but improves the pairing */
		keyupdatemacro(0x00);
		keyupdatemacro(0x08);
		keyupdatemacro(0x10);
		keyupdatemacro(0x18);
		keyupdatemacro(0x20);
 		keyupdatemacro(0x28);
		keyupdatemacro(0x30);
	 	keyupdatemacro(0x38);
		keyupdatemacro(0x40);
	 	keyupdatemacro(0x48);
		keyupdatemacro(0x50);
		keyupdatemacro(0x58);
		keyupdatemacro(0x60);
		keyupdatemacro(0x68);
		keyupdatemacro(0x70);

#if 1
		}
#endif /* #if 0 */
		}
	else
		{ /* generic 'C' version */
#endif /* WIN32 */
		knd[1][0]  ^= drkeydiffs[bi][ 1][0];
		knd[1][1]  ^= drkeydiffs[bi][ 1][1];
		knd[2][0]  ^= drkeydiffs[bi][ 2][0];
		knd[2][1]  ^= drkeydiffs[bi][ 2][1];
		knd[3][0]  ^= drkeydiffs[bi][ 3][0];
		knd[3][1]  ^= drkeydiffs[bi][ 3][1];
		knd[4][0]  ^= drkeydiffs[bi][ 4][0];
		knd[4][1]  ^= drkeydiffs[bi][ 4][1];
		knd[5][0]  ^= drkeydiffs[bi][ 5][0];
		knd[5][1]  ^= drkeydiffs[bi][ 5][1];
		knd[6][0]  ^= drkeydiffs[bi][ 6][0];
		knd[6][1]  ^= drkeydiffs[bi][ 6][1];
		knd[7][0]  ^= drkeydiffs[bi][ 7][0];
		knd[7][1]  ^= drkeydiffs[bi][ 7][1];
		knd[8][0]  ^= drkeydiffs[bi][ 8][0];
		knd[8][1]  ^= drkeydiffs[bi][ 8][1];
		knd[9][0]  ^= drkeydiffs[bi][ 9][0];
		knd[9][1]  ^= drkeydiffs[bi][ 9][1];
		knd[10][0] ^= drkeydiffs[bi][10][0];
		knd[10][1] ^= drkeydiffs[bi][10][1];
		knd[11][0] ^= drkeydiffs[bi][11][0];
		knd[11][1] ^= drkeydiffs[bi][11][1];
		knd[12][0] ^= drkeydiffs[bi][12][0];
		knd[12][1] ^= drkeydiffs[bi][12][1];
		knd[13][0] ^= drkeydiffs[bi][13][0];
		knd[13][1] ^= drkeydiffs[bi][13][1];
		knd[14][0] ^= drkeydiffs[bi][14][0];
		knd[14][1] ^= drkeydiffs[bi][14][1];
#ifdef WIN32
	}
#endif /* WIN32 */
	
	/* unrolling the loop seems to help quite a bit
	for (z = 1; z<15; z++)
		{
		knd[z][0] ^= drkeydiffs[bi][z][0];
		knd[z][1] ^= drkeydiffs[bi][z][1];
		}
	*/
	if (b0 == 256) 		
		{
		/*
		this code is entered when it's time to update the next higher
		byte - defined by b1.
		*/
		b0=0;
		bi = gray8[b1++]+8;
		for (z = 0; z<15; z++)
			{
			knd[z][0] ^= drkeydiffs[bi][z][0];
			knd[z][1] ^= drkeydiffs[bi][z][1];
			}
		/* the bit reshuffle in crunchkey means that we dont need
		to run the first round until the higher bits change */
		
		left  = initial_perm_output_left;
		right = initial_perm_output_right;
		do_first_round(knd);
		round1_output_left = left;
		round1_output_right = right;
		
		if (b1 == 256) done_lower = 1;
		}	

}
/*********************/
int do_lower_16()
	{
		static unsigned long sks;
	/*
	This routine steps through the most swiftly changing 16 bits of the key
	chunk under test. This tests 65536 keys (a mini-chunk) and is the minimum 
	number of keys tested before an interruption. On an Intel, it will 
	take, at worst, a few seconds to execute. 

    If it is entered with the key schedule already set up, and the values 
	of b0 and b1 above zero, it will continue to the end of the 16 bit
	mini-chunk

	This routine, and all the code below, it, will benefit greatly from 
	conversion to assembly language. Converting above this point is not
	worth the effort.

	This routine returns:
	  0: No match found
	  1: half match found - needs check for full match.
	*/
#if 0
	int z;
#endif
#ifdef TESTGRAY
unsigned long hw,lw;
/*unsigned char wk[8];*/
#endif /* TESTGRAY */
	done_lower = 0;
	while (done_lower == 0)
		{
		/*
		Eliminating round 0 from most of the key checks is fairly complex. The
		idea is that there are 8 bits in the key (bits 4,6,10,13,44,45,49, and 
		50, in a zero based count where bit 0 is the least significant) which
		do not contribute to the round 0 subkey. By re-arranging the key so that
		these bits are the most quickly varying, we need only update the round
		0 subkey, and perform round 0, when the *other* bits change - once in
		256 key tests. So, what we do is perform round 0 *once*, before we
		iterate through the 256 values of the crunched keys' low order byte. 
		The output from this round 0 is put into round1_output_left and 
		round1_output_right, and is used
		as the input to the middle rounds for those 256 iterations. 
		After we cycle through the lowest byte, we need to perform the first
		iteration again to get the new values for round1_output_left and 
		round_one_output_right. Doing this hack saves 1/15 of the execution 
		time.
		*/
		left =  round1_output_left;		/* initialize left and right */
		right = round1_output_right;

		do_middle_rounds(knd); /* do des rounds 2 through 15 */
#if 0
		des1(knd);
#endif
#ifdef TESTGRAY
ks_to_key(knd,&hw,&lw);
q[lw & 0xffff]++;

/*uncrunchkey(wk,hw,lw);
printf("%6d %02x %02x %02x %02x ",count,b3,b2,b1,b0);
printf("%08x ",lw);  
put8(wk);
printf("\n");
*/
count++;

#endif /* testgray */		
	
		
		checksum ^= left;
		/*
		the output from the 15th round is left half of the decrypted text.
		If it does NOT match the predicted output in 
		final_perm_input_right (remember, the halves get swapped every 
		round), then there is no chance that this is the correct key, 
		and there is no need to go on and do round 16 so we can test 
		the other half.
		*/
		if (final_perm_input_right == left)
			{
			/* we've got a half match. We need to run the final round
			and test the output */
			return 1;
			}
		/* update the key schedule to the next key */
		update_key_schedule();
		}
	return 0;
	}
/*********************/
void setup_to_start_chunk(chunk)
{
	sh = chunk;
	sl = 0;
	/* deskey3 generates a complete key schedule from the 2 long
	format */
	if (verbose_flag)
		printf("Preparing to start chunk %06x.\n",sh);
	deskey3(knd,sh,sl,1);
	
	do_first_round(knd);
	round1_output_left = left;
	round1_output_right = right;
    
	done = 0; done_lower = 0; count = 0; 
#if 0
	gray3 = 0; gray2 = 0; gray1 = 0; gray0 = 0;
#endif
/*
	b0 = 0; b1=0; b2=0; b3 = 0; y = 0;
	checksum = 0; *//* workfactor check */
/*	half_matches_found = 0; *//* array for half_matches */

	chkpntcnt = 0;
	checkpoint_status();
}
/*********************/
int setup_to_continue_chunk()
	{
	if (restore_checkpoint() == 0) return 0;
	do_first_round(knd);
	round1_output_left = left;
	round1_output_right = right;
    
	done = 0; done_lower = 0; count = 0; 
	chkpntcnt = 0;
	checkpoint_status();
	return 1;
	}
/*********************/
int test_candidate_key()
{
int x;
/* convert the ks to a crunched key, and back again.
this will generate the round 15 subkey correctly. */
ks_to_key(knd,(unsigned int*)&sh,(unsigned int *)&sl);
deskey3(knd,sh,sl,1);
uncrunchkey(pkey,sh,sl);
/* put8(pkey);printf(" ");*/
	
do_last_round(knd);
if (final_perm_input_left == right)
	{
	/* we've found the target key */
	printf("key is: ");put8(pkey); printf("\n");
	printf(" YOU FOUND THE KEY!!\n");
	done_lower = 1; done = 1;
	record_output(1);
	decrypt_challenge(pkey);
	printf("Decrypted data: %s\n",output);
	exit(0);
	return 1;
	}
else
	{
#if 0
	printf("half match\n");
#endif
	for (x = 0; x < 8; x++)
		half_match[half_matches_found][x] = pkey[x];
	half_matches_found++;
	/* we bailed out of do_lower_16 without updating the
	key schedule. We have to go on after a half match, so
	we need to update the key schedule */
	update_key_schedule();
	}
} 

/*********************/
int do_upper_16()
{
/* this steps through a 'chunk' - a 32 bit segment of the key space.
	It is passed the 'chunk' to search, which corrosponds to the upper
	24 bits of a crunched key. As it steps through the upper 16 bits,
	it calls do_lower_16 to check the lower 16 bits. This is where the
	actual des key checking is done.
	*/
unsigned int z,status;
if (verbose_flag)
	printf("%06x %02x\n",sh,b3);

while (done == 0)
	{
#ifndef TESTGRAY
#ifdef WIN32
	if (Asmversion)	 /* DO_LOWER_16 in fast assembler	*/
		status = x86_do_lower_16();
	else
#endif /* WIN32 */
		do_lower_16();
#else
 	status = 0;
	done_lower = 1;
#endif /* TESTGRAY */
	if (status == 1)
		test_candidate_key();

	done_lower = 0;		/* setup for next lower_16 */
	b1=0;				/* ditto */
	/* update b2 */
	bi = gray8[b2++]+16;	/* which bit changes? */
#ifdef TESTGRAY
	ks_to_key(knd,&qh,&ql);
	ql = ql & 0xffff0000;
	ql = ql >>16;
	q[ql]++;
#endif /* testgray */		

	for (z = 0; z<16; z++)	/* update that bit in */
		{					/* the key schedule */
		knd[z][0] ^= drkeydiffs[bi][z][0];
		knd[z][1] ^= drkeydiffs[bi][z][1];
		}
	if (b2 == 256)		/* time to update b3 */
		{
        /* byte 3 */
#if 0
		b3 = 256;
#endif
		b2=0;
		
		bi = gray8[b3++]+24;
		if (verbose_flag)
			printf("%06x %02x\n",sh,b3);

		for (z = 0; z<16; z++)
			{
			knd[z][0] ^= drkeydiffs[bi][z][0];
			knd[z][1] ^= drkeydiffs[bi][z][1];
			}
		
		if (b3 == 256) 
			{
			done = 1;
			b3 = 0;
			}
		}
	if (chkpntcnt++ >= chkpnttime)
		{
		checkpoint_status();
#if 0
		restore_checkpoint();
#endif
		chkpntcnt = 0;
		} /* end of loop */
	}
	record_output(0);
	return 0;
}
/*********************/
void do_chunk(unsigned long chunk)
{
setup_to_start_chunk(chunk);
do_upper_16();
}

/*********************/
void usage()
{
	printf("Usage: deskr -h -t\n");
	printf("  -h         (help) print this usage message and exit.\n");
	printf("  -t         (test) self-test DES function, run speed test, and exit.\n");
	printf("  -c nnnnnn  (check) starting at chunk xxxxxx (hex).\n");
	printf("  -k xxxxxxxxxxxxxxxx (key) to try on current data.\n");
	printf("  -q         (quiet) Don't print banner at startup\n");
	printf("  -i		 (info) Verbose mode - show data being worked.\n");
    printf("Starting with no arguments or -q restarts where you left off, or\n");
	printf("picks a random starting point if this is the first time.\n");
	exit (0);
}
/*********************/
void parse_commandline(int argc, char *argv[ ])
{
int x;
char c;
int status, num;

print_usage = 0;
run_tests = 0;
start_this_chunk = 0;
run_quietly = 0;
test_a_key = 0;
verbose_flag = 0;

strcpy((char*)firstarg,argv[0]);
x = 1;

while (argv[x] != NULL)
	{
	if (argv[x][0] == '-')
		{
		c = tolower(argv[x][1]);
		switch (c)
			{
		case 't': 
			run_tests = 1;
			break;
		case 'h': 
			usage();
		case 'c':
			x++;
			status = sscanf(argv[x],"%x",&num);
			if ((status != 1) || (num > 0xffffff))
				{
				printf ("-c requires chunk number, range 000000 to ffffff.\n");
				usage();
				}
			sh = num;
			start_this_chunk = 1;
			break;
		case 'q':
			run_quietly = 1;
			break;
		case 'k':
			x++;
			status = sscanf(argv[x],"%s",&unpacked_key_to_test);
			pack_hex_string((char *)unpacked_key_to_test,(char *)key_to_test,8);

			test_a_key = 1;
			break;
		case 'i':
			verbose_flag = 1;
			break;
		default:
			printf("Unrecognized switch: -%c\n",c);
			usage();
			}
		}
	else
		{
		printf("Unrecognized command line argument.\n");
		usage();
		}
	x++;
	}
}
/*********************/
unsigned long get_random_chunk()
/* This returns a pseudorandom 24 bit number, to be used
as a random starting point. We're not looking for a crypto-
graphically strong random number generator here, just something
that gives a reasonably smooth distribution of starting points.
Therefore, the system supplied random number function should
be quite adequate.

The MSVC++ rand function returns random values in the range
0 to RAND_MAX. RAND_MAX seems to be 32767, which is only 
15 bits, so I call it 24 times, and use the value of the 6th 
bit each time. 
*/
{
int x; int bit;
unsigned long num;

num = 0;
for (x = 0; x< 24; x++)
	{
	bit = rand() & 0x20;
	if (bit) num |= 1<<x;
	}
return (num);
}
/*********************/
void test_chunks(unsigned long start_chunk)
	{
	/* test chunks, starting at start_chunk */
	sh = start_chunk;
	while (1)
		{
#if 0
		printf("testing chunk %08lx\n",sh);
#endif

		b3 = 0; b2 = 0; b1 = 0; b0 = 0; half_matches_found = 0;
		checksum = 0;
		checkpoint_status();
		chkpntcnt = 0;
		do_chunk(sh);
		sh++;
		if (sh == 0x1000000) sh = 0;
		}
	}
/*********************/
void continue_testing_chunks()
	{
	unsigned long chunk;
	if (setup_to_continue_chunk())
		{
		if (verbose_flag)
			printf("Continuing chunk %06x from checkpoint file.\n",sh);
		while (1)
			{
			do_chunk(sh);
			sh++;
			if (sh == 0x1000000) sh = 0;
			}
		}
	else
		{
		chunk = get_random_chunk();
		if (verbose_flag)
			printf("Picking random chunk to start: %06x.\n",chunk);
		test_chunks(chunk);
		}
	}
/*********************/


void main(int argc, char *argv[ ])
{
int x;

#ifdef TESTGRAY
	int z;
#endif /* TESTGRAY */

/* process command line */
parse_commandline(argc, argv);

/* set up std stuff */
startup_functions();

#ifdef TESTGRAY
/* print_ks(drkeydiffs[0]);*/
for (x = 0; x<65536; x++) q[x] = 0;
setup_to_start_chunk(0x4533);
chkpnttime = 100000;
/* compile with TESTGRAY and one of the following lines
   uncommented to test if we're checking every key */
/*do_lower_16();  */
x86_do_lower_16();
/* do_upper_16(); */
z = 0;
for (x = 0; x<65536; x++)
	{
	if (q[x] != 1) 
		{
		z++;
	/*	printf("%d\n",x);	 */
		}
	}
if (z == 0)
	printf("testgray ok \n");
else
	printf("testgray not ok\n");
#endif /* TESTGRAY */
	if (run_tests)
		{
		if (rivest_test())
			printf("DES encryption/decryption operating correctly.\n");
		else
			printf("DES encryption/decryption test failed.\n");
		time_check(1);
		 exit(0);
		}
	if (get_test_data() == 0)
		{
		printf("No data file found. Expected %s\n",deskr_in);
		exit(0);
		}
	if (get_ident() == 0)
		{
		printf("no ident file found\n");
		exit(0);
		}
	if (test_a_key)
		{
		decrypt_challenge(key_to_test);
		printf("Decrypt: %s\n",output);
		if (!strncmp((const char*)output,"The unkn",8))
			{
			printf("Decrypt OK: %s\n",output);
			for (x=0; x<8; x++) pkey[x] = key_to_test[x];
			record_output(1);
			}
		else
			printf("Decrypt not OK\n");
		exit(0);
		}
	sl = 0;
	time_check(0); /* call with no output to set chkpnttime */
	if (start_this_chunk)
		test_chunks(sh);
	continue_testing_chunks();
	checkpoint_status();
	}

/* times

home system. 80 MHz Pentium Overdrive chip.
keygen and des decrypt
			iterations		ms
deskey		65536		   36220 original, unoptimized key gen.
deskey3		65536			8120 denovo from drkeydiff fanouts
deskey4		65536			2200 xor only diffs of interated keys
dk4 +gray	65536			1810 first stab at gray code
gray8		65536			1650 iterating gray 8 in first 8 bits.
gray16		65536			1760 iterating gray code in first 16 bits.
no key gen	65536			1320 time for 'c' des decrypt only.
asm 1		65536			 736 1st crude use of asm for rounds only
no key gen  65536			 423 asm 1 time, with guts of update_key gone		
*/
