/*
  net/sched/cls_layer7.c

  Layer 7 (application layer) packet classifier.

  This program is free software.  You can redistribute and/or  modify it 
  under the terms of the GNU General Public License as published by the 
  Free Software Foundation, either version 2 of the License, or (at your 
  option) any later version.
  http://www.gnu.org/licenses/gpl.html

  Written by Matthew Strait, Ethan Sommer and Justin Levandoski, 2003.

  Modeled after:
  cls_tcindex.c:  Written 1998,1999 by Werner Almesberger, EPFL ICA

  TODO (more or less in order of priority):
  -Port to netfilter so it can be used more flexibly.
  -Allow l7-filter to change its mind as new data comes in.  Will require
  patterns to have a priority level, i.e. if "http" is priority 5 and 
  "code red" is priority 9, a packet matching both will be classified as
  "code red".
  -Better support for connections with children (FTP, etc): ability to
  classify children seperately from their parents.
  -Support IPv6 (and any other desired lower level protocols)
  -Get a better regexp implementation, one that does everything grep does.

  WISH LIST:
  -Module unloading support.  When filters are turned off, the module use
  count does not decrement.  However, this is true of other filters as well,
  such as cls_u32, so probably changes in cls_api.c is needed, which is 
  outside our realm.
  -Become more confident that our implentations of 
  layer7_(init|walk|get|dump|put|change|delete) do everything they are supposed
  to do and nothing they aren't.  Too bad there's no documentation that says
  exactly what they are supposed to do (at least none that many hours of
  searching has found).  We haven't observed any problems with our 
  implementations, but...
*/

#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <linux/if_ether.h>
#include <net/pkt_sched.h>
#include <net/route.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/proc_fs.h>
#include <linux/ctype.h> /* Needs to be last. Else isprint() might not work (?) */


/* don't want regexp to be a seperate module (unless someone asks for it to
be), so just inline it... */
#ifdef CONFIG_NET_CLS_LAYER7_MODULE
	#include "regexp/regexp.c"
#else
	#include "regexp/regexp.h"
#endif

#define LAYER7_VERSION "0.4.1"

/* See config help for info about debugging options */
#ifdef CONFIG_LAYER7_DEBUG
  #define DPRINTK(format,args...) printk(format,##args)
#else
  #define DPRINTK(format,args...)
#endif

#ifdef CONFIG_LAYER7_DEBUG_MORE
  #include <linux/time.h>
  #define DPRINTK2(format,args...) printk(format,##args)
#else
  #define DPRINTK2(format,args...)
#endif

#ifdef CONFIG_LAYER7_DEBUG_EVEN_MORE
  #define DPRINTK3(format, args...) printk(format,##args)
#else
  #define DPRINTK3(format,args...)
#endif

#define LAYER7_MAX_PATTERN_DEF_SIZE 8192
#define LAYER7_MAX_PROTOCOL_NAME_SIZE 256

/* The number of bytes to look at before giving up. */
#define LAYER7_MAX_DATA_LENGTH 1024

/* data stored from previous packets, so we can match across packets */
struct layer7_session_data {
    unsigned char data[LAYER7_MAX_DATA_LENGTH];
    int hash;
    int length;
};

/* A sliding window of sessions we are currently working on, 
 * for each new session we use the next session, until we wrap around */
/* We use static arrays rather than dynamic allocation to prevent
any possibility that a DoS attack will run us out of memory.
See the config help for more info. */
struct layer7_session_data layer7_sessions[CONFIG_LAYER7_SESSION_WINDOW_SIZE];
int layer7_next_session = 0;


/* Number of packets to look at before giving up.
This can get modified by writes to /proc/net/layer7_numpackets */
int num_packets = 8;

struct layer7_filter_result {
	struct tcf_police *police;
	struct tcf_result res;
};

struct layer7_filter {
	__u16 key;
	struct layer7_filter_result result;
	struct layer7_filter *next;
};

struct layer7_data {
	struct layer7_filter_result * perfect; /* perfect hash; NULL if none */
	struct layer7_filter ** h; /* imperfect hash; only used if !perfect;
				      NULL if unused */
	__u16 mask;		/* AND key with mask */
	int shift;		/* shift ANDed key to the right */
	int hash;		/* hash table size; 0 if undefined */
	int alloc_hash;		/* allocated size */
	int fall_through;	/* 0: only classify if explicit match */
};


/* one element in the classification hash table, each connection
 * is remembered here so that it doesn't need to be reclassified 
 * for each packet once the connection has been identified */
struct ct_hashElem {
	u32 classid;
	u32 hash;
	int num_pkts_so_far;
	int classified;
	int sessionid;
	int datalen;
};

/* hash table that matches connections to the connection's state */
struct ct_hashElem currentSockets[32768];

/* a pattern defined by writing to /proc/net/layer7_protocols */
struct layer7_pattern {
	char * name; 
	regexp * pattern;
	int patternsize;
	unsigned char * uncomppattern; /* unsigned for consistant isprint use */
};

/* pattern classification pair (a.k.a. filter rule) */
struct layer7_patclas_pair {
#ifdef CONFIG_LAYER7_DEBUG
	char * name;
#endif
	regexp * pattern;
	u32 classification;
	u32 handle;
	void * parent;
};


/* all the rules we are currently attempting to match on */
struct layer7_patclas_pair *layer7_patclas_pairs = NULL;

/* how many pairs we have so far */
int layer7_num_patclas_pairs = 0;

/* array of all the patterns which have been defined *
 * and an int to keep track of how many we have      */
struct layer7_pattern * layer7_patterns = NULL;
int layer7_num_patterns = 0;

/* the char* which holds the pattern definitions given to us 
 * through the /proc filesystem */
char * layer7_unparsed_patterns = NULL;

/* Define a new pattern (which consists of a name (eg "http") and a regular
 * expression (eg "http.*get"))
 * 
 * This is made to be memory efficent at the cost of time (it reallocates 
 * the memory each time so it only uses exactly the ammount necesary) because 
 * it will only be called one time per pattern definition. 
 *
 * The order that patterns are stored in layer7_patterns is the order that each
 * protocol was first entered. 
 */
void add_layer7_pattern(const char *name, char *pattern) 
{
	struct layer7_pattern *newpatterns=NULL;
	int x;
	/* first see if we already have a pattern by that name */
	for (x = 0; x < layer7_num_patterns; x++){
		if (!strcmp(name, layer7_patterns[x].name)) {
			/* keep a copy of the old regexp in case the new comp fails */
			regexp * oldpattern = kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
			memcpy(oldpattern, layer7_patterns[x].pattern, layer7_patterns[x].patternsize);
	
			/* just recompile the regexp and return */
			/* compile the pattern (we only want to do this once) */
			if (!(layer7_patterns[x].pattern =
			     regcomp(pattern, &layer7_patterns[x].patternsize))) /* if regcomp fails */
			{ 
			    printk(KERN_ERR "layer7: ERROR COMPILING REGEX \"%s\"\nold regex will be kept instead\n", pattern);
			    /* go back to the old regex */
			    layer7_patterns[x].pattern = oldpattern;
			}
			else
			{			  
				kfree(layer7_patterns[x].uncomppattern);
				layer7_patterns[x].uncomppattern =
					kmalloc(strlen(pattern)+1, GFP_KERNEL);
				strcpy(layer7_patterns[x].uncomppattern, pattern);
				kfree(oldpattern);
			}

			return;
		}
	}

	/* if we have not found a pattern by that name add a new one*/

	/* allocate the memory for the new array */
	newpatterns = kmalloc( sizeof(struct layer7_pattern) * 
			 (layer7_num_patterns + 1), GFP_KERNEL);

	if (layer7_num_patterns > 0)
	{
		/* copy any previously declared patterns in */
		memcpy(newpatterns, layer7_patterns,
		       sizeof(struct layer7_pattern) * (layer7_num_patterns + 1));
		/* free the memory the old patterns were using */
		kfree(layer7_patterns);
	}
	layer7_num_patterns++;

	/* set the newpatterns to be the authoritative patterns */
	layer7_patterns = newpatterns;

	/* copy the name */
	layer7_patterns[layer7_num_patterns-1].name =
		kmalloc(strlen(name)+1, GFP_KERNEL);

	strcpy(layer7_patterns[layer7_num_patterns-1].name, name);
	/* copy the uncomp pattern */
	layer7_patterns[layer7_num_patterns-1].uncomppattern =
		kmalloc(strlen(pattern)+1, GFP_KERNEL);

	strcpy(layer7_patterns[layer7_num_patterns-1].uncomppattern, pattern);

	/* compile the pattern (we only want to do this once) */
	if (!(layer7_patterns[layer7_num_patterns-1].pattern =
	     regcomp(pattern, &layer7_patterns[layer7_num_patterns-1].patternsize))) /* if regcomp fails */
	{ 
		printk(KERN_ERR "layer7: ERROR COMPILING REGEX \"%s\"\n", pattern);
		/* make sure we don't use this regexp, 
		   if more are added they will just overwrite the bad regexp */
		layer7_num_patterns--; 
	}
}

/* Defines a new filtering rule, for example "any packet which matches
 * the pattern called http should be classified as 0x10001".
 *
 * This is made to be memory efficent at the cost of time (it reallocates the 
 * memory each time so it only uses exactly the ammount necesary) because 
 * it will only be called one time per pattern we are matching on per boot.
 *
 * The order of the filter rules in layer7_patclas_pairs is the order they 
 * were added.
 */
void add_layer7_filter_rule(const char *name, const u32 classification, const u32 handle, void * parent) 
{
	int x;
	/* loop through all the patterns */
	for (x = 0; x < layer7_num_patterns; x++) 
	{
		if (!strcmp(name, layer7_patterns[x].name)) {

			/* allocate the memory for the new array */
			struct layer7_patclas_pair * newpairs = 
				kmalloc( sizeof(struct layer7_patclas_pair) * 
					 (layer7_num_patclas_pairs + 1), GFP_KERNEL);

			/* don't copy or free things if they don't exist yet*/
			if (layer7_num_patclas_pairs > 0) {
				/* copy any previously declared patterns in */
				memcpy(newpairs, layer7_patclas_pairs,
				       sizeof(struct layer7_patclas_pair) *
				       (layer7_num_patclas_pairs+1));
				
				/* free the memory the old patterns were using */
				kfree(layer7_patclas_pairs);
			}
			layer7_num_patclas_pairs++;

			/* set the newpatterns to be the authoritative patterns */
			layer7_patclas_pairs = newpairs;	
			
			/* copy in the pattern so that if it is freed we don't crash */

			layer7_patclas_pairs[layer7_num_patclas_pairs - 1].pattern =
				kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
			memcpy(layer7_patclas_pairs[layer7_num_patclas_pairs-1].pattern,
			       layer7_patterns[x].pattern, layer7_patterns[x].patternsize);

			#ifdef CONFIG_LAYER7_DEBUG
			layer7_patclas_pairs[layer7_num_patclas_pairs-1].name = 
				kmalloc(strlen(name)+1, GFP_KERNEL);
			strcpy(layer7_patclas_pairs[layer7_num_patclas_pairs-1].name, name); 
			#endif
			layer7_patclas_pairs[layer7_num_patclas_pairs-1].classification = 
				classification;
			layer7_patclas_pairs[layer7_num_patclas_pairs-1].handle = 
				handle;
			layer7_patclas_pairs[layer7_num_patclas_pairs-1].parent = 
				parent;
			return;
		}
	}
	printk(KERN_ERR "layer7: There is no rule for \"%s\"\n", name);
}

/* this is a hash function which acts on the timespec to get a relatively 
 * good hash. It uses 15 bit chunks and XORs them. 
 * TODO: make the chunk size user defined so that the hash table 
 * can be bigger/smaller? */
static int layer7_hash(struct timespec ts) 
{
	int hash = (ts.tv_nsec&32767) ^ 
	  ((ts.tv_nsec>>15)&32767) ^ 
	  ((ts.tv_nsec>>30)&32767) ^ 
	  (ts.tv_sec&32767) ^ 
	  ((ts.tv_sec>>15)&32767) ^ 
	  ((ts.tv_sec>>30)&32767);
	return hash;
}

/* These functions test what kind of packet we're dealing with.
include/linux/if_ether.h suggests that all packets are treated as
Ethernet, but I'm not absolutely sure, and the presence of *raw in
skb->mac troubles me.  I depend on the IP header always starting at the
same offset, so if this is wrong, there's trouble. -MLS */

static int is_ipv4(struct sk_buff * skb)
{
	/* I'm also not convinced that this code ever gets run if
	it isn't IP, since running dhclient (which should send ARPs or 
	RARPs) doesn't cause this to return false. 
	I'm not sure what other testing I can do. */

	/* the htons is important.  It fixes the endianness */
	if(htons(skb->protocol) != ETH_P_IP)
 		return 0;

	return 1;
}

#define IP_PROTO_OFFSET 9

/* I'd rather just call this "is_tcp", except it depends on it being IPv4 and
TCP could be used on top of other protocols */
static inline int is_tcp_over_ipv4(struct sk_buff * skb)
{
	/* I don't want to depend on skb->nh.iph->protocol being set, because
	I bet it isn't when we are acting as a switch, just like skb->h.th isn't
	when acting as a router. */
	if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_TCP)
		return 0;

	return 1;
}

/* Again, I'd rather just call this "is_udp"... */
static inline int is_udp_over_ipv4(struct sk_buff * skb)
{
	if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_UDP)
		return 0;

	return 1;
}

static inline int is_icmp_over_ipv4(struct sk_buff * skb)
{
	if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_ICMP)
		return 0;

	return 1;
}

/* Returns the number of bytes into the skb->data that the application
data starts. This is a kludge because we don't know how to do it right,
or even if there really is a right way of doing it. */
/* HLEN == hl == header length.  4 == bytes/word */
static int app_data_offset(struct sk_buff *skb)
{
	/* ip_hl = 4*skb->nh.iph->ihl would usually work, but I bet the
	iph struct isn't set when acting as a switch! */
	int ip_hl = 4*(skb->data[ETH_HLEN] & 0x0f);

	if( is_tcp_over_ipv4)
	{
		/* 12 == offset into TCP header for the header length field. We can't get this
		with skb->h.th->doff because the tcphdr struct doesn't get set when routing */
		int tcp_hl = 4*(skb->data[ETH_HLEN + ip_hl + 12] >> 4);
		return ETH_HLEN + ip_hl + tcp_hl;
	}
	else if( is_udp_over_ipv4(skb) )
		return ETH_HLEN + ip_hl + 8; /* UDP header is always 8 bytes */
	else if( is_icmp_over_ipv4(skb) )
		return ETH_HLEN + ip_hl + 8; /* ICMP header is 8 bytes */
	else
	{
		printk(KERN_ERR "layer7: tried to handle unknown protocol!\n");
		return ETH_HLEN + ip_hl + 8; /* something reasonable */
	}
}

/* This function is only called until the connection is classified or for the 
 * first few packets (whichever limit comes first.) The classification happens 
 * here. After a connection has been identified it continues to be of that 
 * type. */
static int layer7_really_classify(struct sk_buff *skb, struct tcf_result *res, int hash, void* parent)
{
        int x = 0;
        int match = 0; 
        int sid = currentSockets[hash].sessionid;

        /* the application layer data */
        unsigned char * app_data = skb->data + app_data_offset(skb);  

	int datalen = (int)skb->tail - (int)app_data ;

	/* If there's no new data, there's no way this is useful */
	if(datalen == 0) 
	{
		DPRINTK2("layer7: empty packet\n");
		goto fail;
	}
	/* If we can't fit the new data in, there's no way it's useful either */
	if(layer7_sessions[sid].length >= LAYER7_MAX_DATA_LENGTH - 2) 
	{
		DPRINTK2("layer7: no room left for packet\n");
		goto fail;
	}

        /* this looks slow, but changing it to a memcpy (which loses the ability to
        strip out nulls and do tolower) does not make a noticable difference in speed,
        so we suspect that this is not a bottleneck. */      
        for(x = 0; x < datalen; x++)
        {
          /* it is -2 to leave space for the null (-2, not -1?)*/
          if (layer7_sessions[sid].length < LAYER7_MAX_DATA_LENGTH - 2) 
          { 
                if (app_data[x] != 0)
                { 
                        layer7_sessions[sid].data[layer7_sessions[sid].length] = tolower(app_data[x]);
                        layer7_sessions[sid].length++;
                }
          }     
        }       
        /* make it into a null-terminated string */
        layer7_sessions[sid].data[layer7_sessions[sid].length] = '\0'; 

#ifdef CONFIG_LAYER7_DEBUG
        if (strlen(layer7_sessions[sid].data) != 0) {
          printk("layer7: data so far (%d packet(s), %d bytes): ('.' = non-printable)\n",
                currentSockets[hash].num_pkts_so_far, strlen(layer7_sessions[sid].data));
          for (x = 0; x < strlen(layer7_sessions[sid].data); x++){
            if (isprint(layer7_sessions[sid].data[x]) && layer7_sessions[sid].data[x] < 128) 
                printk("%c",layer7_sessions[sid].data[x]);
            else                 
                printk(".");
          }
          printk("\n");
        }
        else
          printk("layer7: stream 0x%x packet %d, no data yet\n", hash, currentSockets[hash].num_pkts_so_far);
#endif

	DPRINTK2("layer7: checking for matches:\n");

        /* loop through all the patclas pairs to see if we can match it */
        for (x = 0; x < layer7_num_patclas_pairs; x++)
        {
		#ifdef CONFIG_LAYER7_DEBUG_MORE
			long int ns;
			struct timespec l7time1 /* = CURRENT_TIME */, l7time2;
			jiffies_to_timespec(jiffies,&l7time1);
		#endif

                match = (layer7_patclas_pairs[x].parent == parent) && 
                        regexec(layer7_patclas_pairs[x].pattern, layer7_sessions[sid].data);
		#ifdef CONFIG_LAYER7_DEBUG_MORE
			jiffies_to_timespec(jiffies,&l7time2);
			/* l7time2 = CURRENT_TIME; */ /* 2.6 */
			ns = 1000000000*(int)(l7time2.tv_sec - l7time1.tv_sec) + 
			l7time2.tv_nsec - l7time1.tv_nsec;	
			if(ns != 0)
				printk("%s took %ld nanoseconds (%ld ms)\n", 
				layer7_patclas_pairs[x].name, ns, ns/1000000);
		#endif
                if (match) 
                {
                        DPRINTK("layer7: stream 0x%x: found match: %s\n", 
                                hash, layer7_patclas_pairs[x].name);
                        break;
                }
        }

	DPRINTK2("\nlayer7: done checking for matches\n");

        if(match)
        {
                /* classify it */
                res->classid = layer7_patclas_pairs[x].classification; 

                /* we are a "generic filter", so class is always set to 0.  
                See "Linux Network Traffic Control -- Implementation Overview",
                4 Feb 2001, section 5.3 */
                res->class = 0; 

                /* record how we classified it */
                currentSockets[hash].classid = layer7_patclas_pairs[x].classification; 
                currentSockets[hash].hash = hash;
                currentSockets[hash].classified = 1;
                return TC_POLICE_OK;
        }

	/* Didn't try to match (no new data), or tried to match + found none */
	fail:

	DPRINTK("layer7: stream 0x%x: no match.\n", hash);

        res->class = 0;
       
        /* remember to use the default in the futrure */
        currentSockets[hash].classid=res->classid; 
        currentSockets[hash].hash = hash;

        /* this is the "unclassified" case, so leave
        currentSockets[hash].classified alone */
        return TC_POLICE_UNSPEC;
}

static int layer7_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
{
	enum ip_conntrack_info ctinfo;
	struct ip_conntrack *conntrack;
	int hash;

	/* check if we can deal with the protocol */
	if( is_ipv4(skb) )
	{
		DPRINTK3("layer7: Is IPv4, going on.\n");	

		if(0); /* ugly, makes the sifting below work */
		#ifdef CONFIG_LAYER7_IPV4_UDP
		else if ( is_udp_over_ipv4(skb))
			DPRINTK3("    layer7: Is UDP/IPv4, going on.\n"); 
		#endif
		#ifdef CONFIG_LAYER7_IPV4_TCP
		else if( is_tcp_over_ipv4(skb))
			DPRINTK3("    layer7: Is TCP/IPv4, going on.\n");
		#endif
		#ifdef CONFIG_LAYER7_IPV4_ICMP
		else if( is_icmp_over_ipv4(skb))
			DPRINTK3("    layer7: Is ICMP/IPv4, going on.\n");
		#endif
		else
		{
			DPRINTK3("    layer7: Not any supported protocol, giving up.\n");
			return TC_POLICE_UNSPEC;
		}
	}
	else{
		DPRINTK2("layer7: Not IPv4, giving up.\n");
		return TC_POLICE_UNSPEC;
	}

	/* get a ip_conntrack */
	if(!(conntrack = ip_conntrack_get(skb, &ctinfo)))
	{
		printk(KERN_ERR "layer7: layer7_classify: error getting conntrack, giving up.\n");
		return TC_POLICE_UNSPEC;
	}

	/* see if we can get a master conntrack (and its master etc) 
	   (for ftp etc) */
	while (master_ct(conntrack) != NULL)  {
		conntrack = master_ct(conntrack);
	}

	/* the conntrack got bzeroed somewhere, so that should be 0
	   the first time around... */
	if (conntrack->timestamp.tv_sec == 0){
		/*conntrack->timestamp = CURRENT_TIME;*/ /* 2.4/2.6 difference */
                jiffies_to_timespec(jiffies,&conntrack->timestamp); /* 2.4/2.6 difference */

		hash = layer7_hash(conntrack->timestamp);
		memset(&currentSockets[hash], 0, sizeof(struct ct_hashElem));

                DPRINTK2("layer7: Net session 0x%x put in slot %d\n", hash, layer7_next_session);
               
                currentSockets[hash].sessionid = layer7_next_session++;
                if (layer7_next_session == CONFIG_LAYER7_SESSION_WINDOW_SIZE) 
                {
                    layer7_next_session = 0;
                    DPRINTK2("layer7: next session wrapped");
                }
                layer7_sessions[currentSockets[hash].sessionid].length = 0;

		/* used to check for overwritten sessions being accessed */
		layer7_sessions[currentSockets[hash].sessionid].hash = hash;
	}

	/* we hash on the timestamp we added to the conntrack */
	hash = layer7_hash(conntrack->timestamp);

	/* If we already know about this connection, this increments the 
	   packet count. If not, this doesn't hurt anything. */
	currentSockets[hash].num_pkts_so_far++;

        DPRINTK3("currentSockets[%x].num_pkts_so_far=%d\n",hash,currentSockets[hash].num_pkts_so_far);

        /* If we've seen this connection before and we're not trying to 
	   classify it anymore, either because we've given up or because 
	   we've found a match */
	if ( currentSockets[hash].hash == hash && 
	     (currentSockets[hash].num_pkts_so_far > num_packets ||
		currentSockets[hash].classified) )
	{
                DPRINTK3("layer7: using old classification for packet %d of stream 0x%x\n",
                        currentSockets[hash].num_pkts_so_far, hash);   

                if(currentSockets[hash].classified)
                {
                        /* classify it as what we classified it as before */
                        res->classid = currentSockets[hash].classid;
                        res->class = 0;
                        return TC_POLICE_OK;
                }
                else
                        return TC_POLICE_UNSPEC;
	}
	/* if we've seen it before, but we still need to classify it */
	else if(currentSockets[hash].hash == hash)
        {
		int retval;

		/* This happens if the following occurs: (1) a connection is opened, it is
		not classified, and has not had more than num_packets packets. (2) at least
		CONFIG_LAYER7_SESSION_WINDOW_SIZE more connections are opened, such that one 
		starts using the same slot in layer7_sessions that the first connection
		is/was using. (3) The first connection recieves another packet. 
		In this case, we simply give up on that first connection, as its old
		data has been overwritten. 
		With this system, the worst a DoS attack should be able to do is force
		all the legit connections into the default class.  */
		if(layer7_sessions[currentSockets[hash].sessionid].hash != hash)
		{
			printk(KERN_WARNING "layer7: overwritten session accessed!  If you get "
			"this message frequently, please refer to the layer 7 documentation.\n");
			retval = TC_POLICE_UNSPEC;
			
			/* never check again */
			currentSockets[hash].num_pkts_so_far = num_packets + 1; 

			return retval;
		}
		else
                	retval = layer7_really_classify(skb, res, hash,tp->root);

                if(retval == TC_POLICE_UNSPEC)
		   DPRINTK2("layer7: stream 0x%x packet #%d, still unmatched.\n", 
                       hash, currentSockets[hash].num_pkts_so_far);
		else
                    DPRINTK2("layer7: stream 0x%x packet #%d, found match. Classified as %x\n", 
                        hash, currentSockets[hash].num_pkts_so_far, currentSockets[hash].classid);

                return retval;
	}
	/* otherwise this is the first packet of a new connection */
	else{
		int retval;
	
		currentSockets[hash].num_pkts_so_far = 1;
		currentSockets[hash].classified = 0;

		retval = layer7_really_classify(skb, res, hash,tp->root);
		if(retval == TC_POLICE_UNSPEC)
                   DPRINTK2("layer7: stream 0x%x packet #1, still unmatched.\n", hash);
		else
 		   DPRINTK2("layer7: stream 0x%x packet #1. Classified as %x\n", 
                       hash, currentSockets[hash].classid);
		return retval;
	}

	return TC_POLICE_OK; /* == 0 */
}

/* Returns the "internal id" (the index into the patclas array) of the 
   rule corresponding to handle.  Untested! */
static unsigned long layer7_get(struct tcf_proto *tp, u32 handle)
{
	int x;
	/* loop through to find the corresponding rule */
	for (x = 0; x < layer7_num_patclas_pairs; x++) {
		if (layer7_patclas_pairs[x].handle == handle) 
			return x;
	}
	/* otherwise return layer7_num_patclas_pairs */
	return layer7_num_patclas_pairs;
}


/* This doesn't do anything in _any_ of the other filters either. 
(but this is one of the required functions) */
static void layer7_put(struct tcf_proto *tp, unsigned long f)
{
	DPRINTK2("layer7_put called.  Doing nothing.\n");
}

/* This actually does something, but we're not sure what.
Or rather, we know that it sets tp and that it makes tc crash if tp isn't
set, but we don't know why.  It works... (derived from tcindex.c) */
static int layer7_init(struct tcf_proto *tp)
{
	struct layer7_data *p;

	DPRINTK2("layer7_init called:  Might not be doing the right thing.\n");
	
        MOD_INC_USE_COUNT; /* not in 2.6 */
	p = kmalloc(sizeof(struct layer7_data), GFP_KERNEL);
	if (!p) {
		MOD_DEC_USE_COUNT; /* not in 2.6 */
		return -ENOMEM;
	}
	tp->root = p;
	p->perfect = NULL;
	p->h = NULL;
	p->hash = 0;
	p->mask = 0xffff;
	p->shift = 0;
	p->fall_through = 1;

	return 0;
}

/* XXX More info needed here. 
We're not sure exactly what this is supposed to do.  We're copying what 
cls_tcindex.c does and nothing appears to be broken because of this approach. */
static int layer7_delete(struct tcf_proto *tp, unsigned long arg)
{
	struct layer7_filter_result *r = (struct layer7_filter_result *) arg;
	unsigned long cl;

	DPRINTK2("layer7_delete called: might not be doing the right thing.\n");

	cl = __cls_set_class(&r->res.class,0);
	if (cl)
		tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);

#ifdef NET_CLS_POLICE
	tcf_police_release(r->police);
#endif
	return 0;
}


/* There are no parameters for layer7_init, so we overload layer7_change */
static int layer7_change(struct tcf_proto * tp, unsigned long base, u32 handle,
    struct rtattr ** tca, unsigned long * arg)
{
	struct layer7_filter_result new_filter_result = {
		NULL,		/* no policing */
		{ 0,0 },	/* no classification */
	};
	struct rtattr * opt = tca[TCA_OPTIONS-1];
	struct rtattr * tb[TCA_LAYER7_MAX];
	struct layer7_filter_result * r = (struct layer7_filter_result *) * arg;
	char* protocol = NULL;
	u32 classid = 0;


	if (!opt)
		return 0;
	if(rtattr_parse(tb, TCA_LAYER7_MAX,RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
		return -EINVAL;

	/* Get protocol here */	
	if (tb[TCA_LAYER7_PROTOCOL - 1]) {
		if (RTA_PAYLOAD(tb[TCA_LAYER7_PROTOCOL - 1]) < sizeof(int))
			return -EINVAL;
		
		protocol = (char *)RTA_DATA(tb[TCA_LAYER7_PROTOCOL - 1]);
	}
	
	r = &new_filter_result;
	
	if (tb[TCA_LAYER7_CLASSID-1]) {
		classid = *(__u32 *) RTA_DATA(tb[TCA_LAYER7_CLASSID - 1]);
        }

	DPRINTK2("add_layer7_filter_rule, protocol: %s, with classid: %x, handle %u\n", protocol, classid, handle);
	add_layer7_filter_rule(protocol, classid, handle,tp->root);

#ifdef NET_CLS_POLICE
	{
		struct tcf_police *police;

		police = tb[TCA_LAYER7_POLICE - 1] ?
		    tcf_police_locate(tb[TCA_LAYER7_POLICE - 1], NULL) : NULL;
		tcf_tree_lock(tp);
		police = xchg(&r->police, police);
		tcf_tree_unlock(tp);
		tcf_police_release(police);
	}
#endif
	return 0;
}

/* XXX More information needed here.
Can't find any documentation on what this function is supposed to do.
While the other filters do (cryptic) things here, we haven't observed
anything bad happening as a result of ours doing nothing... */
static void layer7_walk(struct tcf_proto * tp, struct tcf_walker * walker)
{
	DPRINTK2("layer7_walk called.  Not implemented.\n");
}

/* delete all the rules in the filter */
static void layer7_destroy(struct tcf_proto *tp)
{
	int x;

	/* clear the filter rules */
	if (layer7_patclas_pairs != NULL) {
		for (x=0;x<layer7_num_patclas_pairs;x++) 
		{
			kfree(layer7_patclas_pairs[x].pattern);
		}
		kfree(layer7_patclas_pairs);
		layer7_patclas_pairs=NULL;
		layer7_num_patclas_pairs=0;
	}
}


/* XXX more information needed here.
This gets called each time a filter is added, but we can't find any 
documentation that defines what it is supposed to do or why it gets called
when it does.  However, nothing seems to be broken because of our current
approach. */
static int layer7_dump(struct tcf_proto *tp, unsigned long fh,
    struct sk_buff *skb, struct tcmsg *t)
{
	DPRINTK2("layer7_dump called.  Might not be doing the right thing.\n");

	return skb->len; /* cls_tcindex.c does this, don't know why... */
}

struct tcf_proto_ops cls_layer7_ops = {
	.next = NULL,
	.kind = "layer7",
	.classify = layer7_classify,
	.init = layer7_init,
	.destroy = layer7_destroy,
	.get = layer7_get,
	.put = layer7_put,
	.change = layer7_change,
	.delete = layer7_delete,
	.walk = layer7_walk,
	.dump = layer7_dump,
/*	.owner = THIS_MODULE */
};

/* converts a hex digit (ASCII) to a decimal one (int) */
int hex2dec(char c)
{
	switch (c)
	{
		case '0' ... '9':
			return c - '0';
		case 'a' ... 'f':
			return c - 'a' + 10;
		case 'A' ... 'F':
			return c - 'A' + 10;
		default:
			printk(KERN_ERR "layer7: hex2dec: bad value!\n");
			return 0;
	}
}


/* converts a "decimal" number on [0..15] to a hex digit (ASCII) */
char dec2hex(int n)
{
	switch (n)
	{
		case 0 ... 9:
			return (char)n + '0';
		case 10 ... 15:
			return (char)n - 10 + 'a';
		default:
			printk(KERN_ERR "layer7: dec2hex: bad value!\n");
			return 0;
	}
}

/* write out the patterns to userland. (yes, write reads and read writes.) */
int layer7_read_proc(char* page, char ** start, off_t off, int count, 
		     int* eof, void * data) 
{
	if (layer7_patterns == NULL){
		/* there are no patterns yet */
		*eof=1;
		page='\0';
		return 0;
	}
	else{
		int x;
		/* there are patterns */
		page[0]='\0';
		for (x=0;x<layer7_num_patterns;x++){
			int i;
			strncat(page, layer7_patterns[x].name, strlen(layer7_patterns[x].name));
			strncat(page, "\t", 1);

			/* spit it out in a friendly way.  Not necessarily exactly in the format
			that it was inputted, but at least in all printable characters */
			for(i = 0; i < strlen(layer7_patterns[x].uncomppattern); i++)
			{
				if(!isprint(layer7_patterns[x].uncomppattern[i]) || 
				   layer7_patterns[x].uncomppattern[i] > 127)
				{
					char * h = "  ";
					h[0] = dec2hex(layer7_patterns[x].uncomppattern[i]/16);
					h[1] = dec2hex(layer7_patterns[x].uncomppattern[i]%16);
					strncat(page, "\\x", 2);
					strncat(page, h, 2);
				}
				else
				{	/* this is _really_ elegant */
					char * n = " ";
					n[0] = layer7_patterns[x].uncomppattern[i];
					strncat(page, n, 1);
				}					
			}
			strncat(page,"\n", 1);
		}
			
		*eof=1;
		return strlen(page);
	}
}

/* Read in the protocols from userland */
int layer7_write_proc(struct file* file, const char* buffer, 
		      unsigned long count, void *data) 
{
	int x = 0, y, i;
	char *patterns;
	char *name;
	char *pattern;
	
	/* free the old pattens if they exist */
	if (layer7_unparsed_patterns != NULL)
		kfree(layer7_unparsed_patterns);

	/* allocate space for the new ones */
	layer7_unparsed_patterns=(char *)kmalloc(count + 1, GFP_KERNEL);
	/* bail if it fails */
	if (!layer7_unparsed_patterns)
		return 0;

	/* copy in the data from userland */
	copy_from_user(layer7_unparsed_patterns, buffer, count);
	layer7_unparsed_patterns[count]= '\0';

	/* double enforce case insensitivity.  
	We also call tolower on the packets */
	for(i = 0; i < count; i++)
	    layer7_unparsed_patterns[i] = tolower(layer7_unparsed_patterns[i]);

	/* add the patterns to the pattern definitions table */
	pattern = (char*)kmalloc(LAYER7_MAX_PATTERN_DEF_SIZE, GFP_KERNEL);
	patterns = layer7_unparsed_patterns;
	while (x < count) 
	{
		name = (char*)kmalloc(LAYER7_MAX_PROTOCOL_NAME_SIZE, GFP_KERNEL);
		name[0] = '\0';
		pattern[0] = '\0';
		/* read past comment lines */
		while (x < count && (patterns[x]=='#' || isspace(patterns[x]))){
			if (patterns[x] == '#'){
				while (x<count && patterns[x]!='\n'){
				  x++;
				}
			}
			x++;
		}
		
		/* read in the name */
		y = 0;
		while (x < count && !isspace(patterns[x]) &&
		       y < LAYER7_MAX_PROTOCOL_NAME_SIZE - 1){
			name[y] = patterns[x];
			x++; 
			y++;
		}
		name[y] = '\0';
		
		/* skip over comments and blank lines */
		while (x < count && (patterns[x] == '#' || isspace(patterns[x])))
		{
			if (patterns[x] == '#')
				while (x < count && patterns[x] != '\n') 
					x++;
			x++;
		}
		
		/* read in the pattern.  Do some preprocessing so that we can support
		perl-style matches of non-printable characters, even though our regexp
		implementation doesn't.  This means that what is catted out of 
		/proc/layer7_protocols won't be quite what was catted in... 
		Crufty, but acceptable, I think. -MLS */
		y = 0;
 		while (x < count && patterns[x] != '\n' && patterns[x] != '\r' &&
        		y < LAYER7_MAX_PATTERN_DEF_SIZE-1)
		{
			/* handles \xHH notation */
            		if (x+3 < count &&
				patterns[x] == '\\' && patterns[x+1] == 'x' && 
                		isxdigit(patterns[x+2]) && isxdigit(patterns[x+3])) 
			{
				/* carefully remember to call tolower yet again... */
               			pattern[y] = tolower(hex2dec(patterns[x+2])*16 +
                          		hex2dec(patterns[x+3]));
				x+=3; /* 4 total */
            		}
            		else 
               			pattern[y]=patterns[x];

           		x++; y++;
	       	}

		pattern[y] = '\0';
		/* if we now have both a pattern and name add it */
		if (strlen(name) != 0 && strlen(pattern) != 0)
		{
			DPRINTK("layer 7: Adding protocol:pattern :: %s:%s\n", name, pattern);
			add_layer7_pattern(name, pattern);
		}
	}

	return count;
}


/* taken from drivers/video/modedb.c */
int my_atoi(const char *s)
{
    int val = 0;

    for (;; s++) {
        switch (*s) {
            case '0'...'9':
                val = 10*val+(*s-'0');
                break;
            default:
                return val;
        }
    }
}

/* write out num packets to userland. */
int layer7_read_proc_np(char* page, char ** start, off_t off, int count, 
		     int* eof, void * data) 
{
	if(num_packets > 99)
		printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
	
	page[0] = num_packets/10 + '0';
	page[1] = num_packets%10 + '0';
	page[2] = '\n';
	page[3] = '\0';
		
	*eof=1;

	return 3;
}

/* Read in num_packets from userland */
int layer7_write_proc_np(struct file* file, const char* buffer, 
		      unsigned long count, void *data) 
{
	char * foo = (char *) kmalloc(3*sizeof(char), GFP_KERNEL);

	/* copy in the data from userland */
	copy_from_user(foo, buffer, count);

	num_packets = my_atoi(foo);

	/* This is totally arbitrary in order to make the math easier.
	   I'm lazy.  But anyway, 99 is a LOT! */
	if(num_packets > 99)
	{
		printk(KERN_WARNING "layer7: num_packets can't be > 99. Setting to 99.\n");
		num_packets = 99;
	}
	else if(num_packets < 1)
	{
		printk(KERN_WARNING "layer7: num_packets can't be < 1. Setting to 1.\n");
		num_packets = 1;
	}

	return count;
}

void layer7_cleanup_proc(void)
{
	remove_proc_entry("layer7_protocols", proc_net);
	remove_proc_entry("layer7_numpackets", proc_net);
}

/* register the proc file */
void layer7_init_proc(void)
{
	{
		struct proc_dir_entry* entry;

		/* create the file */
		entry = create_proc_entry("layer7_protocols", 0644, proc_net);

		/* set the callback functions */
		entry->read_proc = layer7_read_proc;
		entry->write_proc = layer7_write_proc;
	}

	{
		struct proc_dir_entry* entry;

		entry = create_proc_entry("layer7_numpackets", 0644, proc_net);

		entry->read_proc = layer7_read_proc_np;
		entry->write_proc = layer7_write_proc_np;
	}
}

#ifdef MODULE
int init_module(void)
{
	printk(	"layer7: layer7 module (version %s) starting. "
		"Supported protocols are:\n" 
	#ifdef CONFIG_LAYER7_IPV4_TCP
	"layer7: TCP over IPv4\n"
	#endif
	#ifdef CONFIG_LAYER7_IPV4_UDP
	"layer7: UDP over IPv4\n"
	#endif
	#ifdef CONFIG_LAYER7_IPV4_ICMP
	"layer7: ICMP over IPv4\n"
	#endif
		, LAYER7_VERSION);
	
	DPRINTK ("layer7: debugging on.\n");
	DPRINTK2("layer7: more debugging on.\n");
	DPRINTK3("layer7: even more debugging on.\n");
	layer7_init_proc();
	return register_tcf_proto_ops(&cls_layer7_ops);
}

void cleanup_module(void) 
{
	DPRINTK("layer7: cleanup_module called\n");
	layer7_cleanup_proc();
	unregister_tcf_proto_ops(&cls_layer7_ops);
}
MODULE_LICENSE("GPL");
#endif

