
/*****************************************************************************
                Copyright Carnegie Mellon University 1992

                      All Rights Reserved

 Permission to use, copy, modify, and distribute this software and its
 documentation for any purpose and without fee is hereby granted,
 provided that the above copyright notice appear in all copies and that
 both that copyright notice and this permission notice appear in
 supporting documentation, and that the name of CMU not be
 used in advertising or publicity pertaining to distribution of the
 software without specific, written prior permission.

 CMU DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
 CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 SOFTWARE.
*****************************************************************************/

/* hash.h -- Generic Hash Table include file

$Header: hash.h,v 1.1 90/11/27 23:20:26 mwm Exp $

	Package by:  Mark Maimone 27 June 90  CMU Computer Science
	Last Modified:  27 June 90

	These files (hash.h, hash.c) implement a closed hash table with
   double hashing indexing, on key-value pairs whose size is determined at
   runtime.  Tables will grow automatically to support as many insertions
   as are made, up to the limits of available memory.  Unfortunately,
   there is no delete operator (since it's not needed for the Miro' prober
   and verifier).

	This package is very flexible, it can support any type of
   key-value pair, so long as both have a fixed maximum size.  The
   drawbacks are that very little static type checking can be done
   (everything is coerced into char *'s), and each access requires several
   dereferences (so it might be a little slow).

	Since near-full closed hash tables have greatly degraded
   performance, the size of the table will automatically be increased when
   a certain fraction of the table is full.  In this implementation, the
   table will double its size when it is half full.  See "Data Structures
   and Algorithms" by Aho, Hopcroft, and Ullman (sections 4.7, 4.8) for
   details on selecting these constants.

	The default secondary hashing scheme is somewhat stupid, in that
   it applies a fixed increment, so bunching up can still occur (see AHU
   p. 133), but it will do for now.  A better scheme is discussed in AHU,
   again on page 133.

	There are ten routines associated with this package:

	InitHashTable -- create a new hash table.  It's best to estimate
	   the number of entries in the table ahead of time, so as to avoid
	   wasting space.
	Insert -- adds a new (key,value) pair to the table
	Lookup -- returns a pointer to the key slot where the given key
	   goes, or should go.  NULL is returned on error.
	Empty -- checks to see if a key slot is empty (you can't just
	   check against a zero vector, since the "empty" key value might be
	   something else)
	ShowTable -- writes a version of the hash table to a file.  Mainly
	   useful for debugging new hash functions.
	default_str_hash -- The default first hash function uses the bytes
	   stored in the key to compute its index.  This will not work if
	   string pointers are stored (i.e. if the bodies of the strings
	   are stored outside the table).  This function adds up the bytes
	   in the character string itself, rather than its address.
	HashTableSize -- return the number of entries in the table

		The final three functions are used within a FOR loop:

	HashEnumInit -- set up structures for enumerating elements
	HashEnumTest -- TRUE while there are still elements left to be
	   enumerated.  (key,value) pairs are returned via parameters to
	   this routine.  This should only be called *once* for each
	   (key,value) pair.
	HashEnumInc -- prepare for the next call of HashEnumTest.

int InitHashTable (h, bad_key, table_max, key_size, value_size, key_cmp,
		hash1, hash2)
	HashTableT **h	-- points to an uninitialized pointer
	char *bad_key	-- points to an unused key value
	int table_max	-- number of entries in the initial table
	int key_size	-- number of bytes required to store 1 key
	int value_size	-- number of bytes required to store 1 value
	int (*key_cmp) (keyT *key1, *key2)
			-- function that behaves like strcmp()
	int (*hash1) (char *key; int table_max, key_size, (*key_cmp)())
			-- computes first index into table.  The default
			   routine adds up bytes in the key, modulo table
			   maximum.
	int (*hash2) (key, table_max, hash1_val, probe, key_size, key_cmp)
			-- computes secondary hash offset.  The default
			   routine finds the largest number less than 10
			   that is relatively prime to the current table
			   maximum, and adds a multiple of this to the
			   current guess.
	-- returns 1 on success, 0 on failure

int Insert (h, key, value)
	HashTableT *h	-- pointer to *valid* hash table
	char *key	-- pointer to start of key in memory
	char *value	-- pointer to start of value in memory
	-- returns 1 on success, 0 on failure

char *Lookup (h, key)
	HashTableT *h	-- pointer to *valid* hash table
	char *key	-- pointer to start of key in memory
	-- returns pointer into   h   where   key   would go.  If the
	   table already has such a key, a pointer to that key is
	   returned.  Otherwise, a pointer to an empty slot is returned.
	   A NULL pointer means a fatal error occurred (full table, but
	   nothing matching   key)

int Empty (entry, bad_key, size)
	char *entry	-- pointer to (key, value) pair in the table
	char *bad_key	-- points to an unused key value
	unsigned size	-- size of the   key   field in bytes
	-- returns 1 if the   key   field represents an empty entry, 0
	   otherwise

ShowTable (h, fp, key_print, value_print)
	HashTable *h	-- pointer to *valid* hash table
	FILE *fp	-- output file pointer
	char *(*key_print) (char *key; unsigned len)
			-- returns a string rep of   key   (which has given
			   len).  default is a hex dump of memory
	char *(*value_print) (char *value; unsigned len)
			-- returns a string rep of   value   (which has
			   given   len).  default is a hex dump
	-- Writes a representation of the hash table to the output file
	   pointer.

int default_str_hash (key, max, size, cmp)
	char *key	-- address of string pointer
	int max		-- current size of hash table
	int size	-- number of bytes in string pointer
	int (*cmp) ()	-- function that behaves like strcmp (on char **)
	-- returns index into the hash table, based on the characters
	   contained in the string

int HashTableSize (h)
	HashTable *h	-- table to consider
	-- returns the number of entries in the table

HashEnumInit (h)
	HashTable *h	-- table whose elements you want to enumerate
	-- sets up internal structures for iterating through the table

int HashEnumTest (h, key, value)
	HashTable *h	-- table being enumerated
	keyT **key	-- address of storage for key pointer; if NULL, no
			   key will be returned
	valueT *value	-- address of storage for value pointer; if NULL, no
			   value will be returned
	-- returns TRUE if there are still (key,value) pairs to enumerate,
	   FALSE otherwise.  If TRUE is returned, then   key and value
	   will point to the next (key,value) pair.  There are *no*
	   guarantees on the order of the elements in the enumeration.
	   **** NOTE **** This routine performs the function of
	   HashEnumInc in the current implementation, so this routine
	   should only be called *once* for each (key,value) pair.

HashEnumInc (h)
	HashTable *h	-- table being enumerated
	-- do any necessary adjustments to prepare for the next call to
	   HashEnumTest.  **** NOTE **** This routine does nothing in the
	   current implementation.


$Log:	hash.h,v $
 * Revision 1.1  90/11/27  23:20:26  mwm
 * Initial revision
 * 
 * Revision 1.1  90/09/11  13:36:50  mwm
 * Initial revision
 * 

*/

#ifndef HASH_MWM
#define HASH_MWM


#define Insert(h,k,v) Insert_hlp ((h), (char *) (k), (char *) (v))
#define InitHashTable(h,b,m,k,v,kc,h1,h2) InitHashTable_hlp ((h), (b), \
	(int) (m), (int) (k), (int) (v), (int(*)()) (kc), (int(*)()) (h1), \
	(int(*)()) (h2))
#define HashEnumInit(h) ((t1_hash_ = (h)), t1_hash_ -> iter = t1_hash_ -> \
	table)
#define HashEnumInc(h)
#define HashTableSize(h) ((t1_hash_ = (h)) ? t1_hash_ -> current_size : 0)

typedef struct {
    unsigned key_size, value_size;	/* values in bytes */
    int (*key_cmp) (/* key *v1, *v2 */);
    int current_size, table_max;
    char *table, *bad_key, *iter;
    int (*hash1) (/* key *k, int table_max, key_size, (*key_cmp) () */);
    int (*hash2) (/* key *k, int table_max, h1, key_size, (*key_cmp) () */);
} HashTableT;

static HashTableT *t1_hash_;		/* Used in macro processing */

int InitHashTable_hlp (/* HashTableT **h, int table_max, key_size, value_size,
	(*key_cmp) (), (*hash1) (), (*hash2) () */);
int Insert_hlp (/* HashTableT *h, char *key, *value */);
ShowTable (/* HashTableT *h, FILE *fp, char *(*key_print) (),
	*(*value_print) () */);
char *Lookup (/* HashTableT *h, char *key */);
int Empty (/* char *entry, *bad_key, unsigned size */);
int default_str_hash (/* char *key, int max, size, (*cmp)() */);
int HashEnumTest (/* HashTableT *h, keyT **key, valueT **value */);

#endif
