/* diamond.c - Encryption designed to exceed DES in security.
   This file and the Diamond and Diamond Lite Encryption Algorithms
   described herein are hereby dedicated to the Public Domain by the
   author and inventor, Michael Paul Johnson.  Feel free to use these
   for any purpose that is legally and morally right.  The names
   "Diamond Encryption Algorithm" and "Diamond Lite Encryption
   Algorithm" should only be used to describe the algorithms described
   in this file, to avoid confusion.

   Disclaimers:  the following comes with no warranty, expressed or
   implied.  You, the user, must determine the suitability of this
   information to your own uses.  You must also find out what legal
   requirements exist with respect to this data and programs using
	 it, and comply with whatever valid requirements exist.

	 Adaption into a Windows DLL by Andy Brown 1994. Viewing this file
   with an 80 column display is not a good idea.
	 Changelog:

	 .	Ccitt32Table precomputed when the DLL is first loaded
	 .	A constant expression lifted out of the innermost key generation
			loops in makeonebox() and set_diamond_key()
	 .	set_diamond_key() returns an error status code to the caller
	 .	set_diamond_key() takes the cipher type as an argument rather than
			the raw block size
	 .  now uses a des-like "key-scheduling" mechanism to store the internal
			key and other cipher information in a programmer-supplied structure.
			This modification added to cope with possible multi-tasking clashes.
   .  A constant expression lifted out of the loop in substitute(), isubst()
*/


#include <windows.h>
#include <windowsx.h>
#include "diamond.h"

/* CRC definitions */

#define CRC_MASK           0xFFFFFFFFL
#define CRC32_POLYNOMIAL   0xEDB88320L
#define crc32(crc,c)(((crc>>8)&0x00FFFFFFL)^(Ccitt32Table[(int)((int)crc^c)&0xFF]))

/* static variables */

static LPBYTE key;
static WORD keysize;
static WORD keyindex;
static DWORD accum;
static DWORD Ccitt32Table[256];

/* static function prototypes */

int FAR PASCAL LibMain(HANDLE hInstance,WORD wDataSeg,WORD wHeapSize,LPSTR lpszCmdLine);
static WORD keyrand(WORD max_value);
static VOID makeonebox(WORD i,WORD j,LPDIAMONDKEY dmkey);
static VOID permute(LPBYTE x, LPBYTE y);
static VOID ipermute(LPBYTE x,LPBYTE y);
static VOID permute_lite(LPBYTE a,LPBYTE b);
static VOID ipermute_lite(LPBYTE b,LPBYTE a);
static VOID substitute(WORD round,LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey);
static VOID isubst(WORD round, LPBYTE x, LPBYTE y,LPDIAMONDKEY dmkey);
static VOID BuildCRCTable(VOID);


/******************************/
/* LibMain DLL initialisation */
/******************************/

int FAR PASCAL LibMain(HANDLE hInstance,WORD wDataSeg,WORD wHeapSize,LPSTR lpszCmdLine)
{
	if(wHeapSize>0) UnlockData(0);
	BuildCRCTable();
	return 1;
}


static WORD keyrand(WORD max_value)    	/* Returns uniformly distributed pseudorandom */
{                           						/* value based on key[], sized keysize */
WORD prandvalue, i;
DWORD mask;
    
	if(!max_value) return 0;
  mask=0L;              					/* Create a mask to get the minimum */
  for(i=max_value;i>0;i=i>>1)    	/* number of bits to cover the */
		mask=(mask<<1)|1L;  					/* range 0 to max_value. */

  i=0;
  do
  {
    accum=crc32(accum,key[keyindex++]);
    if(keyindex>=keysize)
    {
    	keyindex=0;   							/* Recycle thru the key */
      accum=crc32(accum,(keysize & 0xFF));
      accum=crc32(accum,((keysize>>8) & 0xFF));
    }
		prandvalue=(WORD)(accum & mask);
    if((++i>97) && (prandvalue>max_value))   /* Don't loop forever. */
    	prandvalue-=max_value;               	 /* Introduce negligible bias. */
  } while(prandvalue>max_value); 				 		 /* Discard out of range values. */
	return prandvalue;
}

static VOID makeonebox(WORD i,WORD j,LPDIAMONDKEY dmkey)
{
int n;
WORD pos,m,p,roundmult;
static BOOL filled[256];

	for(m=0;m<256;m++)   						/* The filled array is used to make sure that */
		filled[m]=FALSE;      				/* each byte of the array is filled only once. */

	roundmult=(dmkey->roundsize*i)+(j<<8);
	for(n=255;n>=0;n--) 						/* n counts the number of bytes left to fill */
  {
    pos=keyrand(n);   						/* pos is the position among the UNFILLED */
                            			/* components of the s array that the */
																	/* number n should be placed.  */
    p=0;
    while(filled[p]) p++;
    for(m=0;m<pos;m++)
    {
    	p++;
      while(filled[p]) p++;
    }
		*(dmkey->s+roundmult+p)=n;
		filled[p]=TRUE;
	}
}

UINT FAR PASCAL _export set_diamond_key(LPBYTE external_key,WORD key_size,WORD rounds,BOOL invert,
																				LPDIAMONDKEY dmkey,UINT type)
/* This procedure generates internal keys by filling the substitution box array
  s based on the external key given as input.  It DOES take a bit of time. */
{
WORD i,j,k;

  dmkey->numrounds=rounds;
	if(type==DIAMOND_LITE)
  {
		dmkey->blocksize=8;
		dmkey->roundsize=2048U;
		if(dmkey->numrounds<3)
    	return DERR_ROUNDS;
	}
  else
  {
		dmkey->blocksize=16;
		dmkey->roundsize=4096U;
		if(dmkey->numrounds<5)
			return DERR_ROUNDS;	
	}

	if(dmkey->numrounds*dmkey->blocksize>255)
  	return DERR_ROUNDS;

	if((dmkey->s=(LPBYTE)GlobalAllocPtr(GMEM_MOVEABLE,dmkey->numrounds*dmkey->roundsize))==NULL)
  	return DERR_MEMORY;

	key=external_key;
  keysize=key_size;
  keyindex=0;
  accum=0xFFFFFFFFL;

	for(i=0;i<dmkey->numrounds;i++)
  {
  	for(j=0;j<dmkey->blocksize;j++)
			makeonebox(i,j,dmkey);
	}
  if(invert)
  {   				/* Fill the inverse substitution box array si.  It is not
             	   necessary to do this unless the decryption mode is used.  */
		if((dmkey->si=(LPBYTE)GlobalAllocPtr(GMEM_MOVEABLE,dmkey->numrounds*dmkey->roundsize))==NULL)
		{
			(VOID)GlobalFreePtr(dmkey->s);
    	return DERR_MEMORY;
    }
		for(i=0;i<dmkey->numrounds;i++)
			for(j=0;j<dmkey->blocksize;j++)
			{
				WORD roundmult;

				roundmult=(dmkey->roundsize*i)+(j<<8);
      	for(k=0;k<256;k++)
					*(dmkey->si+roundmult+*(dmkey->s+roundmult+k))=k;
      }
	}
  return 0;
}

static VOID permute(LPBYTE x,LPBYTE y)   /* x and y must be different.
  This procedure is designed to make each bit of the output dependent on as
  many bytes of the input as possible, especially after repeated application.
  Each output byte takes its least significant bit from the corresponding
  input byte.  The next higher bit comes from the corresponding bit of the
  another input byte.  This is done until all bits of the output byte
  are filled.
*/
{
  y[0] =  (x[0] & 1)  | (x[1] & 2)  | (x[2] & 4)  |
          (x[3] & 8)  | (x[4] & 16) | (x[5] & 32) |
          (x[6] & 64) | (x[7] & 128);
  y[1] =  (x[1] & 1)  | (x[2] & 2)  | (x[3] & 4)  |
          (x[4] & 8)  | (x[5] & 16) | (x[6] & 32) |
          (x[7] & 64) | (x[8] & 128);
  y[2] =  (x[2] & 1)  | (x[3] & 2)  | (x[4] & 4)  |
          (x[5] & 8)  | (x[6] & 16) | (x[7] & 32) |
          (x[8] & 64) | (x[9] & 128);
  y[3] =  (x[3] & 1)  | (x[4] & 2)  | (x[5] & 4)  |
          (x[6] & 8)  | (x[7] & 16) | (x[8] & 32) |
          (x[9] & 64) | (x[10] & 128);
  y[4] =  (x[4] & 1)  | (x[5] & 2)  | (x[6] & 4)  |
          (x[7] & 8)  | (x[8] & 16) | (x[9] & 32) |
          (x[10] & 64)| (x[11] & 128);
  y[5] =  (x[5] & 1)  | (x[6] & 2)  | (x[7] & 4)  |
  				(x[8] & 8)  | (x[9] & 16) | (x[10] & 32)|
          (x[11] & 64)| (x[12] & 128);
  y[6] =  (x[6] & 1)  | (x[7] & 2)   | (x[8] & 4)  |
          (x[9] & 8)  | (x[10] & 16) | (x[11] & 32)|
          (x[12] & 64)| (x[13] & 128);
  y[7] =  (x[7] & 1)  | (x[8] & 2)   | (x[9] & 4)  |
          (x[10] & 8) | (x[11] & 16) | (x[12] & 32)|
          (x[13] & 64)| (x[14] & 128);
  y[8] =  (x[8] & 1)  | (x[9] & 2) |   (x[10] & 4) |
          (x[11] & 8) | (x[12] & 16) | (x[13] & 32)|
          (x[14] & 64)| (x[15] & 128);
  y[9] =  (x[9] & 1)  | (x[10] & 2)  | (x[11] & 4) |
          (x[12] & 8) | (x[13] & 16) | (x[14] & 32)|
          (x[15] & 64)| (x[0] & 128);
  y[10] = (x[10] & 1) | (x[11] & 2)  | (x[12] & 4) |
          (x[13] & 8) | (x[14] & 16) | (x[15] & 32)|
          (x[0] & 64) | (x[1] & 128);
  y[11] = (x[11] & 1) | (x[12] & 2)  | (x[13] & 4) |
          (x[14] & 8) | (x[15] & 16) | (x[0] & 32) |
          (x[1] & 64) | (x[2] & 128);
  y[12] = (x[12] & 1) | (x[13] & 2)  | (x[14] & 4) |
          (x[15] & 8) | (x[0] & 16)  | (x[1] & 32) |
          (x[2] & 64) | (x[3] & 128);
  y[13] = (x[13] & 1) | (x[14] & 2)  | (x[15] & 4) |
          (x[0] & 8)  | (x[1] & 16)  | (x[2] & 32) |
          (x[3] & 64) | (x[4] & 128);
  y[14] = (x[14] & 1) | (x[15] & 2)  | (x[0] & 4)  |
					(x[1] & 8)  | (x[2] & 16)  | (x[3] & 32) |
          (x[4] & 64) | (x[5] & 128);
	y[15] = (x[15] & 1) | (x[0] & 2)   | (x[1] & 4)  |
          (x[2] & 8)  | (x[3] & 16)  | (x[4] & 32) |
          (x[5] & 64) | (x[6] & 128);
}

static VOID ipermute(LPBYTE x,LPBYTE y) /* x!=y */
/* This is the inverse of the procedure permute. */
{
    y[0] = (x[0] & 1) | (x[15] & 2) | (x[14] & 4) |
            (x[13] & 8) | (x[12] & 16) | (x[11] & 32) |
            (x[10] & 64) | (x[9] & 128);
    y[1] = (x[1] & 1) | (x[0] & 2) | (x[15] & 4) |
            (x[14] & 8) | (x[13] & 16) | (x[12] & 32) |
            (x[11] & 64) | (x[10] & 128);
    y[2] = (x[2] & 1) | (x[1] & 2) | (x[0] & 4) |
            (x[15] & 8) | (x[14] & 16) | (x[13] & 32) |
            (x[12] & 64) | (x[11] & 128);
    y[3] = (x[3] & 1) | (x[2] & 2) | (x[1] & 4) |
            (x[0] & 8) | (x[15] & 16) | (x[14] & 32) |
            (x[13] & 64) | (x[12] & 128);
    y[4] = (x[4] & 1) | (x[3] & 2) | (x[2] & 4) |
            (x[1] & 8) | (x[0] & 16) | (x[15] & 32) |
            (x[14] & 64) | (x[13] & 128);
    y[5] = (x[5] & 1) | (x[4] & 2) | (x[3] & 4) |
            (x[2] & 8) | (x[1] & 16) | (x[0] & 32) |
            (x[15] & 64) | (x[14] & 128);
    y[6] = (x[6] & 1) | (x[5] & 2) | (x[4] & 4) |
            (x[3] & 8) | (x[2] & 16) | (x[1] & 32) |
            (x[0] & 64) | (x[15] & 128);
    y[7] = (x[7] & 1) | (x[6] & 2) | (x[5] & 4) |
            (x[4] & 8) | (x[3] & 16) | (x[2] & 32) |
            (x[1] & 64) | (x[0] & 128);
    y[8] = (x[8] & 1) | (x[7] & 2) | (x[6] & 4) |
            (x[5] & 8) | (x[4] & 16) | (x[3] & 32) |
            (x[2] & 64) | (x[1] & 128);
    y[9] = (x[9] & 1) | (x[8] & 2) | (x[7] & 4) |
            (x[6] & 8) | (x[5] & 16) | (x[4] & 32) |
            (x[3] & 64) | (x[2] & 128);
    y[10] = (x[10] & 1) | (x[9] & 2) | (x[8] & 4) |
            (x[7] & 8) | (x[6] & 16) | (x[5] & 32) |
            (x[4] & 64) | (x[3] & 128);
    y[11] = (x[11] & 1) | (x[10] & 2) | (x[9] & 4) |
            (x[8] & 8) | (x[7] & 16) | (x[6] & 32) |
            (x[5] & 64) | (x[4] & 128);
    y[12] = (x[12] & 1) | (x[11] & 2) | (x[10] & 4) |
            (x[9] & 8) | (x[8] & 16) | (x[7] & 32) |
            (x[6] & 64) | (x[5] & 128);
    y[13] = (x[13] & 1) | (x[12] & 2) | (x[11] & 4) |
            (x[10] & 8) | (x[9] & 16) | (x[8] & 32) |
            (x[7] & 64) | (x[6] & 128);
		y[14] = (x[14] & 1) | (x[13] & 2) | (x[12] & 4) |
            (x[11] & 8) | (x[10] & 16) | (x[9] & 32) |
            (x[8] & 64) | (x[7] & 128);
    y[15] = (x[15] & 1) | (x[14] & 2) | (x[13] & 4) |
            (x[12] & 8) | (x[11] & 16) | (x[10] & 32) |
            (x[9] & 64) | (x[8] & 128);
}

static VOID permute_lite(LPBYTE a,LPBYTE b)
{
/* This procedure is designed to make each bit of the output dependent on as
  many bytes of the input as possible, especially after repeated application.
*/
    b[0] = (a[0] & 1) + (a[1] & 2) + (a[2] & 4) + (a[3] & 8) + (a[4] & 0x10) +
        (a[5] & 0x20) + (a[6] & 0x40) + (a[7] & 0x80);
    b[1] = (a[1] & 1) + (a[2] & 2) + (a[3] & 4) + (a[4] & 8) + (a[5] & 0x10) +
        (a[6] & 0x20) + (a[7] & 0x40) + (a[0] & 0x80);
    b[2] = (a[2] & 1) + (a[3] & 2) + (a[4] & 4) + (a[5] & 8) + (a[6] & 0x10) +
        (a[7] & 0x20) + (a[0] & 0x40) + (a[1] & 0x80);
    b[3] = (a[3] & 1) + (a[4] & 2) + (a[5] & 4) + (a[6] & 8) + (a[7] & 0x10) +
        (a[0] & 0x20) + (a[1] & 0x40) + (a[2] & 0x80);
    b[4] = (a[4] & 1) + (a[5] & 2) + (a[6] & 4) + (a[7] & 8) + (a[0] & 0x10) +
        (a[1] & 0x20) + (a[2] & 0x40) + (a[3] & 0x80);
    b[5] = (a[5] & 1) + (a[6] & 2) + (a[7] & 4) + (a[0] & 8) + (a[1] & 0x10) +
        (a[2] & 0x20) + (a[3] & 0x40) + (a[4] & 0x80);
    b[6] = (a[6] & 1) + (a[7] & 2) + (a[0] & 4) + (a[1] & 8) + (a[2] & 0x10) +
        (a[3] & 0x20) + (a[4] & 0x40) + (a[5] & 0x80);
    b[7] = (a[7] & 1) + (a[0] & 2) + (a[1] & 4) + (a[2] & 8) + (a[3] & 0x10) +
        (a[4] & 0x20) + (a[5] & 0x40) + (a[6] & 0x80);
}

static VOID ipermute_lite(LPBYTE b,LPBYTE a)
{
/* This is the inverse of the procedure permute. */
    a[0] = (b[0] & 1) + (b[7] & 2) + (b[6] & 4) + (b[5] & 8) + (b[4] & 0x10) +
        (b[3] & 0x20) + (b[2] & 0x40) + (b[1] & 0x80);
    a[1] = (b[1] & 1) + (b[0] & 2) + (b[7] & 4) + (b[6] & 8) + (b[5] & 0x10) +
        (b[4] & 0x20) + (b[3] & 0x40) + (b[2] & 0x80);
    a[2] = (b[2] & 1) + (b[1] & 2) + (b[0] & 4) + (b[7] & 8) + (b[6] & 0x10) +
        (b[5] & 0x20) + (b[4] & 0x40) + (b[3] & 0x80);
    a[3] = (b[3] & 1) + (b[2] & 2) + (b[1] & 4) + (b[0] & 8) + (b[7] & 0x10) +
        (b[6] & 0x20) + (b[5] & 0x40) + (b[4] & 0x80);
    a[4] = (b[4] & 1) + (b[3] & 2) + (b[2] & 4) + (b[1] & 8) + (b[0] & 0x10) +
        (b[7] & 0x20) + (b[6] & 0x40) + (b[5] & 0x80);
    a[5] = (b[5] & 1) + (b[4] & 2) + (b[3] & 4) + (b[2] & 8) + (b[1] & 0x10) +
        (b[0] & 0x20) + (b[7] & 0x40) + (b[6] & 0x80);
    a[6] = (b[6] & 1) + (b[5] & 2) + (b[4] & 4) + (b[3] & 8) + (b[2] & 0x10) +
        (b[1] & 0x20) + (b[0] & 0x40) + (b[7] & 0x80);
    a[7] = (b[7] & 1) + (b[6] & 2) + (b[5] & 4) + (b[4] & 8) + (b[3] & 0x10) +
        (b[2] & 0x20) + (b[1] & 0x40) + (b[0] & 0x80);
}

static VOID substitute(WORD round,LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey)
{
WORD i,roundmult;

	roundmult=dmkey->roundsize*round;
	for(i=0;i<dmkey->blocksize;i++)
		y[i]=*(dmkey->s+roundmult+(i<<8)+x[i]);
}

static VOID isubst(WORD round,LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey)
{
WORD i,roundmult;

	roundmult=dmkey->roundsize*round;
	for(i=0;i<dmkey->blocksize;i++)
		y[i]=*(dmkey->si+roundmult+(i<<8)+x[i]);
}

VOID FAR PASCAL _export diamond_encrypt_block(LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey)
/* Encrypt a block of 16 bytes. */
{
WORD round;
static BYTE z[16];

	substitute(0,x,y,dmkey);
	for(round=1;round<dmkey->numrounds;round++)
  {
  	permute(y,z);
    substitute(round,z,y,dmkey);
  }
}

VOID FAR PASCAL _export diamond_decrypt_block(LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey)
/* Decrypt a block of 16 bytes. */
{
int round;
static BYTE z[16];
    
	isubst(dmkey->numrounds-1,x,y,dmkey);
	for(round=dmkey->numrounds-2;round>=0;round--)
  {
  	ipermute(y,z);
    isubst(round,z,y,dmkey);
  }
}

VOID FAR PASCAL _export lite_encrypt_block(LPBYTE x,LPBYTE y,LPDIAMONDKEY dmkey)
/* Encrypt a block of 16 bytes. */
{
WORD round;
static BYTE z[16];

	substitute(0,x,y,dmkey);
	for(round=1;round<dmkey->numrounds;round++)
  {
  	permute_lite(y,z);
    substitute(round,z,y,dmkey);
  }
}

VOID FAR PASCAL _export lite_decrypt_block(LPBYTE x, LPBYTE y,LPDIAMONDKEY dmkey)
/* Decrypt a block of 8 bytes. */
{
int round;
static BYTE z[8];
    
	isubst(dmkey->numrounds-1,x,y,dmkey);
	for(round=dmkey->numrounds-2;round>=0;round--)
  {
  	ipermute_lite(y,z);
		isubst(round,z,y,dmkey);
  }
}

VOID FAR PASCAL _export diamond_done(LPDIAMONDKEY dmkey)
{
int i;

	if(dmkey->s)
  {
		for(i=0;i<dmkey->numrounds*dmkey->roundsize;dmkey->s[i++]=0);
		(VOID)GlobalFreePtr(dmkey->s);
	}
	if(dmkey->si)
  {
		for(i=0;i<dmkey->numrounds*dmkey->roundsize;dmkey->si[i++]=0);
		(VOID)GlobalFreePtr(dmkey->si);
	}
	dmkey->blocksize=0;														/* erase all evidence */
	dmkey->roundsize=0;
	dmkey->numrounds=0;
	dmkey->s=dmkey->si=NULL;
}

static VOID BuildCRCTable(VOID)
{
int i,j;
DWORD value;

	for(i=0;i<=255;i++)
  {
  	value=i;
    for(j=8;j>0;j--)
    {
    	if(value & 1)
      	value=(value>>1)^CRC32_POLYNOMIAL;
      else
      	value>>=1;
    }
    Ccitt32Table[i]=value;
  }
}
