
/* This is an independent implementation of the SAFER+ encryption   */
/* algorithm submitted by Cylink as a candidate in the US NIST      */
/* Advanced Encryption Standard (AES) programme.  This algorithm    */
/* has not been patented and is freely available for use without    */
/* the need for licensing.  This implementation has been produced   */
/* as a starting point for optimisation and is very inefficient as  */
/* it stands.                                                       */
/*                                                                  */
/* Copyright in this implementation is held by Dr B R Gladman but   */
/* I hereby give permission for its free direct or derivative use   */
/* subject to acknowledgment of its origin.                         */
/*                                                                  */
/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 24th July 1998    */
/*                                                                  */

#include "../std_defs.h"

static char *alg_name = "SAFER";

char *cipher_name()
{
    return alg_name;
};

u1byte  expf[256] =
{     1,  45, 226, 147, 190,  69,  21, 174, 120,   3, 135, 164, 184,  56, 207,  63, 
      8, 103,   9, 148, 235,  38, 168, 107, 189,  24,  52,  27, 187, 191, 114, 247, 
     64,  53,  72, 156,  81,  47,  59,  85, 227, 192, 159, 216, 211, 243, 141, 177, 
    255, 167,  62, 220, 134, 119, 215, 166,  17, 251, 244, 186, 146, 145, 100, 131, 
    241,  51, 239, 218,  44, 181, 178,  43, 136, 209, 153, 203, 140, 132,  29,  20, 
    129, 151, 113, 202,  95, 163, 139,  87,  60, 130, 196,  82,  92,  28, 232, 160, 
      4, 180, 133,  74, 246,  19,  84, 182, 223,  12,  26, 142, 222, 224,  57, 252, 
     32, 155,  36,  78, 169, 152, 158, 171, 242,  96, 208, 108, 234, 250, 199, 217, 
      0, 212,  31, 110,  67, 188, 236,  83, 137, 254, 122,  93,  73, 201,  50, 194, 
    249, 154, 248, 109,  22, 219,  89, 150,  68, 233, 205, 230,  70,  66, 143,  10, 
    193, 204, 185, 101, 176, 210, 198, 172,  30,  65,  98,  41,  46,  14, 116,  80, 
      2,  90, 195,  37, 123, 138,  42,  91, 240,   6,  13,  71, 111, 112, 157, 126, 
     16, 206,  18,  39, 213,  76,  79, 214, 121,  48, 104,  54, 117, 125, 228, 237, 
    128, 106, 144,  55, 162,  94, 118, 170, 197, 127,  61, 175, 165, 229,  25,  97, 
    253,  77, 124, 183,  11, 238, 173,  75,  34, 245, 231, 115,  35,  33, 200,   5, 
    225, 102, 221, 179,  88, 105,  99,  86,  15, 161,  49, 149,  23,   7,  58,  40 
};

u1byte logf[256] = 
{   128,   0, 176,   9,  96, 239, 185, 253,  16,  18, 159, 228, 105, 186, 173, 248, 
    192,  56, 194, 101,  79,   6, 148, 252,  25, 222, 106,  27,  93,  78, 168, 130, 
    112, 237, 232, 236, 114, 179,  21, 195, 255, 171, 182,  71,  68,   1, 172,  37, 
    201, 250, 142,  65,  26,  33, 203, 211,  13, 110, 254,  38,  88, 218,  50,  15, 
     32, 169, 157, 132, 152,   5, 156, 187,  34, 140,  99, 231, 197, 225, 115, 198, 
    175,  36,  91, 135, 102,  39, 247,  87, 244, 150, 177, 183,  92, 139, 213,  84, 
    121, 223, 170, 246,  62, 163, 241,  17, 202, 245, 209,  23, 123, 147, 131, 188, 
    189,  82,  30, 235, 174, 204, 214,  53,   8, 200, 138, 180, 226, 205, 191, 217, 
    208,  80,  89,  63,  77,  98,  52,  10,  72, 136, 181,  86,  76,  46, 107, 158, 
    210,  61,  60,   3,  19, 251, 151,  81, 117,  74, 145, 113,  35, 190, 118,  42, 
     95, 249, 212,  85,  11, 220,  55,  49,  22, 116, 215, 119, 167, 230,   7, 219, 
    164,  47,  70, 243,  97,  69, 103, 227,  12, 162,  59,  28, 133,  24,   4,  29, 
     41, 160, 143, 178,  90, 216, 166, 126, 238, 141,  83,  75, 161, 154, 193,  14, 
    122,  73, 165,  44, 129, 196, 199,  54,  43, 127,  67, 149,  51, 242, 108, 104, 
    109, 240,   2,  40, 206, 221, 155, 234,  94, 153, 124,  20, 134, 207, 229,  66, 
    184,  64, 120,  45,  58, 233, 100,  31, 146, 144, 125,  57, 111, 224, 137,  48
};

u1byte  l_key[33][16];
u1byte  k_len;

u4byte *set_key(u4byte key_blk[], u4byte key_len)
{   u1byte  by, lk[33];
    u4byte  i, j;

    k_len = key_len; lk[4 * k_len] = 0;

    for(i = 0; i < 4 * k_len; ++i)
    {
        lk[i] = by = ((u1byte*)key_blk)[i]; 

        lk[4 * k_len] ^= by;
    }

    for(i = 0; i < 16; ++i)

        l_key[0][i] = lk[i];

    for(i = 0; i < 4 * k_len; ++i)
    {
        for(j = 0; j <= 4 * k_len; ++j)
        {
            by = lk[j]; lk[j] = by << 3 | by >> 5;
        }

        for(j = 0; j < 16; ++j)
        {
            by = expf[(17 * i + j + 35) % 256];

            by = (i < 16 ? expf[by] : by);

            l_key[i + 1][j] = lk[(i + j + 1) % (4 * k_len + 1)] + by;
        }
    }
};

void do_fr(u1byte x[16], u4byte sk_no)
{	u1byte	*kp0 = l_key[sk_no], *kp1 = l_key[sk_no + 1], t;

	x[ 0] = expf[x[ 0] ^ kp0[ 0]] + kp1[ 0];
    x[ 1] = logf[(u1byte)(x[ 1] + kp0[ 1])] ^ kp1[ 1]; 
    x[ 2] = logf[(u1byte)(x[ 2] + kp0[ 2])] ^ kp1[ 2]; 
	x[ 3] = expf[x[ 3] ^ kp0[ 3]] + kp1[ 3];

	x[ 4] = expf[x[ 4] ^ kp0[ 4]] + kp1[ 4];
    x[ 5] = logf[(u1byte)(x[ 5] + kp0[ 5])] ^ kp1[ 5]; 
    x[ 6] = logf[(u1byte)(x[ 6] + kp0[ 6])] ^ kp1[ 6]; 
	x[ 7] = expf[x[ 7] ^ kp0[ 7]] + kp1[ 7];
 
	x[ 8] = expf[x[ 8] ^ kp0[ 8]] + kp1[ 8];
    x[ 9] = logf[(u1byte)(x[ 9] + kp0[ 9])] ^ kp1[ 9]; 
    x[10] = logf[(u1byte)(x[10] + kp0[10])] ^ kp1[10]; 
	x[11] = expf[x[11] ^ kp0[11]] + kp1[11];

	x[12] = expf[x[12] ^ kp0[12]] + kp1[12];
    x[13] = logf[(u1byte)(x[13] + kp0[13])] ^ kp1[13]; 
    x[14] = logf[(u1byte)(x[14] + kp0[14])] ^ kp1[14]; 
	x[15] = expf[x[15] ^ kp0[15]] + kp1[15];

	x[ 1] += x[ 0]; x[ 0] += x[ 1];
	x[ 3] += x[ 2]; x[ 2] += x[ 3];
	x[ 5] += x[ 4]; x[ 4] += x[ 5];
	x[ 7] += x[ 6]; x[ 6] += x[ 7];
	x[ 9] += x[ 8]; x[ 8] += x[ 9];
	x[11] += x[10]; x[10] += x[11];
	x[13] += x[12]; x[12] += x[13];
	x[15] += x[14]; x[14] += x[15];

	x[ 7] += x[ 0]; x[ 0] += x[ 7];
	x[ 1] += x[ 2]; x[ 2] += x[ 1];
	x[ 3] += x[ 4]; x[ 4] += x[ 3];
	x[ 5] += x[ 6]; x[ 6] += x[ 5];
	x[11] += x[ 8]; x[ 8] += x[11];
	x[ 9] += x[10]; x[10] += x[ 9];
	x[15] += x[12]; x[12] += x[15];
	x[13] += x[14]; x[14] += x[13];

	x[ 3] += x[ 0]; x[ 0] += x[ 3];
	x[15] += x[ 2]; x[ 2] += x[15];
	x[ 7] += x[ 4]; x[ 4] += x[ 7];
	x[ 1] += x[ 6]; x[ 6] += x[ 1];
	x[ 5] += x[ 8]; x[ 8] += x[ 5];
	x[13] += x[10]; x[10] += x[13];
	x[11] += x[12]; x[12] += x[11];
	x[ 9] += x[14]; x[14] += x[ 9];

	x[13] += x[ 0]; x[ 0] += x[13];
	x[ 5] += x[ 2]; x[ 2] += x[ 5];
	x[ 9] += x[ 4]; x[ 4] += x[ 9];
	x[11] += x[ 6]; x[ 6] += x[11];
	x[15] += x[ 8]; x[ 8] += x[15];
	x[ 1] += x[10]; x[10] += x[ 1];
	x[ 3] += x[12]; x[12] += x[ 3];
	x[ 7] += x[14]; x[14] += x[ 7];

	t = x[0]; x[0] = x[14]; x[14] = x[12]; x[12] = x[10]; x[10] = x[2]; 
	x[2] = x[8]; x[8] = x[4]; x[4] = t;

	t = x[1]; x[1] = x[7]; x[7] = x[11]; x[11] = x[5]; x[5] = x[13]; x[13] = t; 
	
	t = x[15] ; x[15] = x[3]; x[3] = t;
};

void do_ir(u1byte x[16], u4byte sk_no)
{	u1byte	*kp0 = l_key[sk_no], *kp1 = l_key[sk_no + 1], t;

	t = x[3]; x[3] = x[15]; x[15] = t; 

	t = x[13]; x[13] = x[5]; x[5] = x[11]; x[11] = x[7]; x[7] = x[1]; x[1] = t; 

	t = x[4]; x[4] = x[8]; x[8] = x[2]; x[2] = x[10]; 
	x[10] = x[12]; x[12] = x[14]; x[14] = x[0]; x[0] = t; 

	x[14] -= x[ 7];	x[ 7] -= x[14]; 
	x[12] -= x[ 3]; x[ 3] -= x[12];
	x[10] -= x[ 1]; x[ 1] -= x[10];
	x[ 8] -= x[15]; x[15] -= x[ 8];
	x[ 6] -= x[11]; x[11] -= x[ 6]; 
	x[ 4] -= x[ 9]; x[ 9] -= x[ 4];
	x[ 2] -= x[ 5]; x[ 5] -= x[ 2]; 
	x[ 0] -= x[13];	x[13] -= x[ 0]; 

	x[14] -= x[ 9]; x[ 9] -= x[14]; 
	x[12] -= x[11];	x[11] -= x[12]; 
	x[10] -= x[13];	x[13] -= x[10]; 
	x[ 8] -= x[ 5];	x[ 5] -= x[ 8]; 
	x[ 6] -= x[ 1];	x[ 1] -= x[ 6]; 
	x[ 4] -= x[ 7];	x[ 7] -= x[ 4]; 
	x[ 2] -= x[15];	x[15] -= x[ 2]; 
	x[ 0] -= x[ 3];	x[ 3] -= x[ 0]; 

	x[14] -= x[13];	x[13] -= x[14]; 
	x[12] -= x[15];	x[15] -= x[12]; 
	x[10] -= x[ 9];	x[ 9] -= x[10]; 
	x[ 8] -= x[11];	x[11] -= x[ 8]; 	
	x[ 6] -= x[ 5];	x[ 5] -= x[ 6]; 
	x[ 4] -= x[ 3];	x[ 3] -= x[ 4]; 
	x[ 2] -= x[ 1];	x[ 1] -= x[ 2]; 
	x[ 0] -= x[ 7];	x[ 7] -= x[ 0]; 

	x[14] -= x[15];	x[15] -= x[14]; 
	x[12] -= x[13];	x[13] -= x[12];
	x[10] -= x[11];	x[11] -= x[10]; 
	x[ 8] -= x[ 9];	x[ 9] -= x[ 8]; 
	x[ 6] -= x[ 7];	x[ 7] -= x[ 6];
	x[ 4] -= x[ 5];	x[ 5] -= x[ 4]; 
	x[ 2] -= x[ 3];	x[ 3] -= x[ 2]; 
	x[ 0] -= x[ 1];	x[ 1] -= x[ 0]; 
	
    x[ 0] = logf[(u1byte)(x[ 0] - kp1[ 0])] ^ kp0[ 0];
    x[ 1] = expf[x[ 1] ^ kp1[ 1]] - kp0[ 1];
    x[ 2] = expf[x[ 2] ^ kp1[ 2]] - kp0[ 2];
    x[ 3] = logf[(u1byte)(x[ 3] - kp1[ 3])] ^ kp0[ 3];

    x[ 4] = logf[(u1byte)(x[ 4] - kp1[ 4])] ^ kp0[ 4];
    x[ 5] = expf[x[ 5] ^ kp1[ 5]] - kp0[ 5];
    x[ 6] = expf[x[ 6] ^ kp1[ 6]] - kp0[ 6];
    x[ 7] = logf[(u1byte)(x[ 7] - kp1[ 7])] ^ kp0[ 7];

    x[ 8] = logf[(u1byte)(x[ 8] - kp1[ 8])] ^ kp0[ 8];
    x[ 9] = expf[x[ 9] ^ kp1[ 9]] - kp0[ 9];
    x[10] = expf[x[10] ^ kp1[10]] - kp0[10];
    x[11] = logf[(u1byte)(x[11] - kp1[11])] ^ kp0[11];

    x[12] = logf[(u1byte)(x[12] - kp1[12])] ^ kp0[12];
    x[13] = expf[x[13] ^ kp1[13]] - kp0[13];
    x[14] = expf[x[14] ^ kp1[14]] - kp0[14];
    x[15] = logf[(u1byte)(x[15] - kp1[15])] ^ kp0[15];
};

void encrypt(u16byte in_blk, u16byte out_blk)
{   u1byte  blk[16];
    u4byte  i;

    *(u4byte*)blk        = in_blk[0];
    *(u4byte*)(blk +  4) = in_blk[1];
    *(u4byte*)(blk +  8) = in_blk[2];
    *(u4byte*)(blk + 12) = in_blk[3];

    do_fr(blk,  0); do_fr(blk,  2); do_fr(blk,  4); do_fr(blk,  6);
    do_fr(blk,  8); do_fr(blk, 10); do_fr(blk, 12); do_fr(blk, 14);
	
	if(k_len > 4)
	{
	    do_fr(blk, 16); do_fr(blk, 18); do_fr(blk, 20); do_fr(blk, 22);
	}

	if(k_len > 6)
	{
	    do_fr(blk, 24); do_fr(blk, 26); do_fr(blk, 28); do_fr(blk, 30);
	}

    blk[ 0] ^= l_key[4 * k_len][ 0]; 
    blk[ 1] += l_key[4 * k_len][ 1];
    blk[ 2] += l_key[4 * k_len][ 2];
    blk[ 3] ^= l_key[4 * k_len][ 3]; 
    blk[ 4] ^= l_key[4 * k_len][ 4]; 
    blk[ 5] += l_key[4 * k_len][ 5];
    blk[ 6] += l_key[4 * k_len][ 6];
    blk[ 7] ^= l_key[4 * k_len][ 7]; 
    blk[ 8] ^= l_key[4 * k_len][ 8]; 
    blk[ 9] += l_key[4 * k_len][ 9];
    blk[10] += l_key[4 * k_len][10];
    blk[11] ^= l_key[4 * k_len][11]; 
    blk[12] ^= l_key[4 * k_len][12]; 
    blk[13] += l_key[4 * k_len][13];
    blk[14] += l_key[4 * k_len][14];
    blk[15] ^= l_key[4 * k_len][15]; 

    out_blk[0] = *(u4byte*)blk;
    out_blk[1] = *(u4byte*)(blk +  4);
    out_blk[2] = *(u4byte*)(blk +  8);
    out_blk[3] = *(u4byte*)(blk + 12);
};

void decrypt(u16byte in_blk, u16byte out_blk)
{   u1byte  blk[16];
    u4byte  i;

    *(u4byte*)blk        = in_blk[0];
    *(u4byte*)(blk +  4) = in_blk[1];
    *(u4byte*)(blk +  8) = in_blk[2];
    *(u4byte*)(blk + 12) = in_blk[3];

    blk[ 0] ^= l_key[4 * k_len][ 0];
    blk[ 1] -= l_key[4 * k_len][ 1];
    blk[ 2] -= l_key[4 * k_len][ 2];
    blk[ 3] ^= l_key[4 * k_len][ 3];
    blk[ 4] ^= l_key[4 * k_len][ 4];
    blk[ 5] -= l_key[4 * k_len][ 5];
    blk[ 6] -= l_key[4 * k_len][ 6];
    blk[ 7] ^= l_key[4 * k_len][ 7];
    blk[ 8] ^= l_key[4 * k_len][ 8];
    blk[ 9] -= l_key[4 * k_len][ 9];
    blk[10] -= l_key[4 * k_len][10];
    blk[11] ^= l_key[4 * k_len][11];
    blk[12] ^= l_key[4 * k_len][12];
    blk[13] -= l_key[4 * k_len][13];
    blk[14] -= l_key[4 * k_len][14];
    blk[15] ^= l_key[4 * k_len][15];
    
	if(k_len > 6)
	{
		do_ir(blk, 30); do_ir(blk, 28); do_ir(blk, 26); do_ir(blk, 24);
	}

	if(k_len > 4)
	{
		do_ir(blk, 22); do_ir(blk, 20); do_ir(blk, 18); do_ir(blk, 16);
	}

	do_ir(blk, 14); do_ir(blk, 12); do_ir(blk, 10); do_ir(blk, 8);
	do_ir(blk,  6); do_ir(blk,  4); do_ir(blk,  2); do_ir(blk, 0);

	out_blk[0] = *(u4byte*)blk;
    out_blk[1] = *(u4byte*)(blk +  4);
    out_blk[2] = *(u4byte*)(blk +  8);
    out_blk[3] = *(u4byte*)(blk + 12);
};
