/* -*- linux-c -*- */

/*
 *   Copyright (c) International Business Machines  Corp., 2002
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * linux/drivers/md/dm-sparse.c
 *
 * Sparse target for device-mapper.
 *
 * This target provides the ability to create a sparse device. This 
 * allows a device to pretend to be larger than it really is.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include <linux/vmalloc.h>

#include "dm.h"
#include "dm-io.h"

#define MAX_HASH_CHAIN_ENTRIES 10
#define NAME_SIZE 127

/* Sparse Ioctl
   device
   start
   chunk_size
   chunks
 */

// Entries in the sparse remapping structure
struct sparse_hash_entry {
    u64 org_chunk; // Chunk number, not LBA.
    u64 sparse_chunk; // Chunk number, not LBA.
    struct sparse_hash_entry * next;
    struct sparse_hash_entry * prev;
};

//Private data structure
struct sparse_volume {
    struct dm_dev *dev;
    struct rw_semaphore sparse_semaphore;
    struct sparse_hash_entry ** sparse_map; // Hash table of remappings
    struct sparse_hash_entry * free_hash_list;
    kmem_cache_t * hash_slab;
    mempool_t * hash_pool;
    u32 dm_io_flag;
    u32 chunk_size;	// Sectors.
    u32 chunk_shift; // Shift value for chunk size.
    u32 num_chunks;	// In this volume.
    u32 next_cow_entry; // Index into current COW table.
    u64 current_cow_sector;	// LOGICAL sector of current COW table.
    u32 next_free_chunk; // Index of next free chunk (not LBA!).
    u32 hash_table_size; // Size of the hash table for the remap.
    u64 start;
    u64 cow_table[64]; // One sector's worth of COW tables.
};

/*************************** OLD SERVICES ****************************/

/* computes log base 2 of value */
inline int log2(u32 value) //ok to change to u32?
{
    int result = -1;
    long tmp;              //ok to change to long?
    
    if (value) {
	tmp = value;
	result++;
	while (!(tmp & 1)) {
	    result++;
	    tmp >>= 1;
	}
	if (tmp != 1) {
	    result = -2;
	}
    }
    return result;
}

/********************************* Functions *********************************/

/***************************** Hash Functions *****************************/

/* Take and initialize from the free hash list */
static struct sparse_hash_entry * 
allocate_sparse_hash_entry( struct sparse_volume * volume,	
			    u64 org_chunk,
			    u64 sparse_chunk )
{
    struct sparse_hash_entry * hash_entry;
    
	hash_entry = volume->free_hash_list;
	if ( hash_entry ) { //should always be the case b/c preallocate these
	    volume->free_hash_list = hash_entry->next;
	    hash_entry->org_chunk = org_chunk;
	    hash_entry->sparse_chunk = sparse_chunk;
	    hash_entry->next = NULL;
	    hash_entry->prev = NULL;
	}
	
	return hash_entry;
}

/*
 *	This function inserts a new entry into a sparse hash chain, immediately
 *	following the specified entry. This function should not be used to add
 *	an entry into an empty list, or as the first entry in an existing list.
 *	For that case, use insert_sparse_map_entry_at_head().
 */
static int insert_sparse_hash_entry( struct sparse_hash_entry * entry,
				     struct sparse_hash_entry * base )
{
	entry->next = base->next;
	entry->prev = base;
	base->next = entry;
	if ( entry->next ) {
		entry->next->prev = entry;
	}
	return 0;
}

/*
 *	This function inserts a new entry into a sparse chain as the first
 *	entry in the chain.
 */
static int insert_sparse_hash_entry_at_head( struct sparse_hash_entry * entry,
					     struct sparse_hash_entry ** head )
{
	entry->next = *head;
	entry->prev = NULL;
	*head = entry;
	if ( entry->next ) {
	    entry->next->prev = entry;
	}
	return 0;
}

/*
 *	Delete all items in a single chain in the hash table.
 */
static int delete_sparse_hash_chain( struct sparse_volume * vol, 
				     struct sparse_hash_entry * head )
{
    struct sparse_hash_entry * next;
    
    while ( head ) {
	next = head->next;
	mempool_free( head, vol->hash_pool );
	head = next;
    }
    return 0;
}

/*
 *	This function will search the hash chain that is anchored at the
 *	specified head pointer. If the chunk number is found, a pointer to that
 *	entry in the chain is set, and a 1 is returned. If the chunk is not
 *	found, a pointer to the previous entry is set and 0 is returned. If the
 *	return pointer is NULL, this means either the list is empty, or the
 *	specified sector should become the first list item.
 */
static int search_sparse_hash_chain( u64 chunk,
				     struct sparse_hash_entry * head,
				     struct sparse_hash_entry ** result )
{
    struct sparse_hash_entry * curr = head;
    struct sparse_hash_entry * prev = head;
    while ( curr && curr->org_chunk < chunk ) {
	prev = curr;
	curr = curr->next;
    }
    if (!curr) { // Either an empty chain or went off the end of the chain.
	*result = prev;
	return 0;
    }
    else if ( curr->org_chunk != chunk ) {
	*result = curr->prev;
	return 0;
    }
    else {
	*result = curr;
	return 1;
    }
}

/*
 *	This function takes a cow table entry (from the on-disk data), and
 *	converts it into an appropriate entry for the sparse map, and
 *	inserts it into the appropriate map for the specified volume.
 */
static int add_cow_entry_to_sparse_map( u64 org_chunk,
					u64 sparse_chunk,
					struct sparse_volume * volume )
{
    struct sparse_hash_entry * new_entry;
    struct sparse_hash_entry * target_entry;
    u32 hash_value;
    int rc = -EINVAL;

    new_entry = allocate_sparse_hash_entry(volume, org_chunk, sparse_chunk);
    if (!new_entry) {
	return -ENOMEM;
    }
    
    hash_value = (long)org_chunk % volume->hash_table_size;
    
    if (! search_sparse_hash_chain( org_chunk, 
				    volume->sparse_map[hash_value], 
				    &target_entry ) ) {	
	//should always take this path

	if ( target_entry ) {
	    insert_sparse_hash_entry( new_entry, target_entry );
	}
	else {
	    insert_sparse_hash_entry_at_head
		( new_entry, &(volume->sparse_map[hash_value]) );
	}
	rc = 0;
    }
    return rc;
}

/*
 *	Construct the initial hash table state based on 
 *	existing COW tables on the disk.
 */
static int build_sparse_maps(struct sparse_volume * volume)
{
    int rc = 0, done = 0;
    struct io_region job;
    struct page * page;
    unsigned int error, offset;
  
    while (!done) {
	
	// Read in one sector's worth of COW tables.
        job.dev = volume->dev->dev;
        job.sector = volume->current_cow_sector;
        job.count = 1;
        page = virt_to_page(volume->cow_table);
        offset = (unsigned long)volume->cow_table & ~PAGE_MASK;
        rc = dm_io_sync(1, &job, READ, page, offset, &error);
        if (rc) {
            return rc;
	}

	// Translate every valid COW table entry into
	// a sparse map entry.
	for ( volume->next_cow_entry = 0;

	      volume->next_cow_entry < (SECTOR_SIZE/sizeof(u64)) &&
		  volume->cow_table[volume->next_cow_entry] != 
		  0xffffffffffffffff;

	      volume->next_cow_entry++, volume->next_free_chunk++ ) {

	    if ( (rc = add_cow_entry_to_sparse_map
		  ( le64_to_cpu( volume->cow_table[volume->next_cow_entry] ),
		    volume->next_free_chunk, volume ))) {
		return( rc );
	    }
	}
	// Move on to the next sector if necessary.
	if ( volume->next_cow_entry == (SECTOR_SIZE/sizeof(u64)) ) {
	    volume->current_cow_sector++;
	}
	else {
	    done = 1;
	}
    }
    return 0;
}

/************************* Other Functions ************************/

/*
 * Function: sparse_remap_chunk
 *
 *	This function performs a sector remap on a sparse volume. This should
 *	be called from the I/O path, It first determines the base sector
 *	of the chunk containing the specified sector, and saves the remainder.
 *	Then it performs a search through the sparse map for the specified 
 *	volume. If a match is found, the sector number is changed to the new 
 *	value. If no match is found, the value is left the same, meaning the 
 *	chunk has not been remapped.
 */
static int sparse_remap_chunk( struct sparse_volume * sparse_volume,
			       u64 * sector )
{
    struct sparse_hash_entry * result;
    u64 chunk;
    u32 hash_value;
    u32 remainder;
    int rc = 1;
    
    down_read(&sparse_volume->sparse_semaphore);
    
    remainder = *sector & (u64)(sparse_volume->chunk_size - 1);
    chunk = *sector >> sparse_volume->chunk_shift;
    hash_value = ((u32)chunk) % sparse_volume->hash_table_size;
    
    if ( search_sparse_hash_chain( chunk, 
				   sparse_volume->sparse_map[hash_value], 
				   &result) ) {
	*sector = ( result->sparse_chunk << sparse_volume->chunk_shift ) 
	    + remainder;
	rc =  0;
    }
    up_read(&sparse_volume->sparse_semaphore);
    return rc;
}

/* Function: sparse_cow_write
 *
 *	Check this sparse node to see if the given sector/chunk has been
 *	remapped yet. If it hasn't, create a new hash table entry, update the
 *	in-memory COW table, write the COW table to disk.
 */

static int sparse_cow_write( struct sparse_volume * sparse_volume,
			     u64 * sector )
{
    struct sparse_hash_entry * target_entry, * new_map_entry;
    struct io_region job;
    struct page * page;
    char * cow = NULL;
    unsigned int error, offset;
    u64 chunk;
    u32 hash_value = 0;
    u32 remainder;
    int rc;
    
    down_write(&sparse_volume->sparse_semaphore);
    
    remainder = *sector & (u64)(sparse_volume->chunk_size - 1);
    chunk = *sector >> sparse_volume->chunk_shift;
    hash_value = ((u32)chunk) % sparse_volume->hash_table_size;
    
    if ( search_sparse_hash_chain( chunk, 
				   sparse_volume->sparse_map[hash_value], 
				   &target_entry) ) {
	*sector = 
	    ( target_entry->sparse_chunk << sparse_volume->chunk_shift ) 
	    + remainder;
	rc = 0;
	goto out;
    }
    
    // Is there enough room left on this sparse to remap this chunk?
    if ( sparse_volume->next_free_chunk >= sparse_volume->num_chunks ) {
	DMERR("dm-sparse: full no new remaps allowed\n");
	rc = -ENOSPC;
	goto out;
    }
    
    // Create and initialize a new hash table entry for the new remap.
    new_map_entry = allocate_sparse_hash_entry
	(sparse_volume, chunk, sparse_volume->next_free_chunk);
    if ( ! new_map_entry ) {
	// Can't get memory for map entry. Disable this sparse.
	DMERR("dm-sparse: memory error allocating hash entry\n");
	rc = -ENOMEM;
	goto out;
    }
    
    //Always write cow table so its safe
    cow = kmalloc( SECTOR_SIZE, GFP_KERNEL );
    if (! cow ) {
	// Can't get I/O buffer. Disable this sparse.
	DMERR("dm-sparse: memory error allocating COW table buffer");
	rc = -ENOMEM;
	goto out;	
    }

    // Add the entry to the hash table.
    if ( target_entry ) {	
	insert_sparse_hash_entry( new_map_entry, target_entry );
    }
    else {
	insert_sparse_hash_entry_at_head
	    ( new_map_entry, 
	      &(sparse_volume->sparse_map[hash_value]) );
    }
    
    sparse_volume->next_free_chunk++;
    
    // Update the appropriate entry in the COW table. 
    sparse_volume->cow_table[sparse_volume->next_cow_entry] = 
	cpu_to_le64(chunk);
    sparse_volume->next_cow_entry++;
    
    memcpy(cow, sparse_volume->cow_table, SECTOR_SIZE);

    //because of ordering issues needs to be synchronous
    job.dev = sparse_volume->dev->dev;
    job.sector = sparse_volume->current_cow_sector;
    job.count = 1;
    page = virt_to_page(cow);
    offset = (unsigned long)cow & ~PAGE_MASK;
    dm_io_sync(1, &job, WRITE, page, offset, &error);
    
    // Update the in-memory COW table values.
    if ( sparse_volume->next_cow_entry >= (SECTOR_SIZE/sizeof(u64)) )
	{
	    sparse_volume->next_cow_entry = 0;
	    sparse_volume->current_cow_sector++;
	    memset(sparse_volume->cow_table, 0xff, SECTOR_SIZE);
	}
    
    *sector = ( new_map_entry->sparse_chunk << sparse_volume->chunk_shift )
	+ remainder;
    
    rc = 0;
    
 out:
    up_write(&sparse_volume->sparse_semaphore);
    if ( cow ) {
	kfree( cow );
    }

    return rc;
}

/************************ EXPORT FUNCTIONS ************************/

/*
 * Function: sparse_dtr
 */
static void sparse_dtr( struct dm_target *ti )
{
    struct sparse_volume * vol = (struct sparse_volume *)ti->private;
    int i;

    if (vol) {

	if (vol->sparse_map) {
	    for ( i = 0; i < vol->hash_table_size; i++ ) {
		delete_sparse_hash_chain( vol, vol->sparse_map[i] );
	    }
	    delete_sparse_hash_chain( vol, vol->free_hash_list );
	    vfree(vol->sparse_map);
	}

	if (vol->hash_pool)
	    mempool_destroy(vol->hash_pool);
	
	if (vol->hash_slab) 
	    kmem_cache_destroy(vol->hash_slab);

	dm_put_device(ti, vol->dev);

        if (vol->dm_io_flag) {
	    dm_io_put(1);
	}
    
	kfree( vol );
    }
}

/*
 * Function: sparse_ctr
 */
static int sparse_ctr( struct dm_target *ti, unsigned int argc, char** argv )
{
    int i, rc = -EINVAL;
    struct sparse_hash_entry *new_entry;
    struct sparse_volume *vol;
    struct dm_dev *dev;
    u32 chunk_size, chunks;
    u64 start;
    char* end, slab_name[NAME_SIZE+1];

    if ( argc != 4 ) {
	ti->error="dm-sparse: wrong number of arguments";
	return rc;
    }

    start = simple_strtoull(argv[1], &end, 10);
    if (*end) {
	ti->error="dm-sparse: Invalid first chunk lba";
	return rc;
    }

    chunk_size = simple_strtoul(argv[2], &end, 10);    
    if (*end) {
	ti->error="dm-sparse: Invalid chunk_size";
	return rc;
    }

    chunks = simple_strtoul(argv[3], &end, 10);
    if (*end) {
	ti->error="dm-sparse: Invalid number of chunks";
	return rc;
    }

    if ( dm_get_device( ti, argv[0], ti->begin, start + chunks * chunk_size,
			dm_table_get_mode(ti->table), &dev ) ) {
	ti->error = "dm-sparse: Device lookup failed";
	return rc;
    }

    vol = kmalloc(sizeof(struct sparse_volume), GFP_KERNEL);
    if ( !vol ) {
	ti->error = "dm-sparse: Memory allocation for private-data failed";
        rc = -ENOMEM;
	goto out;
    }

    memset( vol, 0, sizeof(struct sparse_volume) );

    rc = dm_io_get(1);
    if (rc) {
	    ti->error = "dm-sparse: failed to initialize dm-io.";
	    sparse_dtr(ti);
	    return rc;
    }
    
    // Initialize
    vol->dm_io_flag = 1;
    vol->chunk_size = chunk_size;
    vol->chunk_shift = log2(chunk_size);
    vol->num_chunks = chunks;
    vol->current_cow_sector = 1;
    vol->hash_table_size = chunks / MAX_HASH_CHAIN_ENTRIES + 1;
    vol->start = start;
    vol->dev = dev;
    init_rwsem(&vol->sparse_semaphore);

    snprintf(slab_name, NAME_SIZE, "sparse-%p", vol);
    vol->hash_slab = kmem_cache_create(slab_name,
				       sizeof(struct sparse_hash_entry),
				       0, SLAB_HWCACHE_ALIGN,
				       NULL, NULL);
    if ( ! vol->hash_slab ) {
	ti->error = "dm-sparse: memory allocation error in hash slab create";
	sparse_dtr(ti);
	return -ENOMEM;
    }
    vol->hash_pool = mempool_create(1, mempool_alloc_slab,
				    mempool_free_slab,
				    vol->hash_slab);    
    if ( ! vol->hash_pool ) {
	ti->error = "dm-sparse: memory allocation error in hash pool create";
	sparse_dtr(ti);
	return -ENOMEM;
    }

    // Sparse hash table
    vol->sparse_map = vmalloc( vol->hash_table_size * 
			       sizeof( struct sparse_hash_entry * ) );
    if ( ! vol->sparse_map ) {
	ti->error = "dm-sparse: Memory allocation error in sparse_map create";
	sparse_dtr(ti);
	return -ENOMEM;
    }

    memset( vol->sparse_map, 0, vol->hash_table_size * 
	    sizeof( struct sparse_hash_entry * ) );
    
    for ( i = 0; i < chunks; i++ ) {

	new_entry = mempool_alloc(vol->hash_pool, GFP_KERNEL );
	if ( ! new_entry ) {
	    ti->error="dm-sparse: memory allocation error in hash table setup";
	    sparse_dtr(ti);
	    return -ENOMEM;
	}

	new_entry->next = vol->free_hash_list;
	vol->free_hash_list = new_entry;
    }
    
    rc = build_sparse_maps(vol);
    if (rc) {
	ti->error = "dm-sparse: error building hash tables";
	sparse_dtr(ti);
	return rc;
    }

    ti->private = vol;
    return rc;

 out:
    dm_put_device(ti, dev);
    return rc;
}

/*
 * Function: sparse_map
 */
static int sparse_map( struct dm_target * ti, struct buffer_head * bh, int rw,
		       union map_info *map_context )
{
    struct sparse_volume * volume = (struct sparse_volume*)ti->private;
    u64 sector = bh->b_rsector;
    int rc;



    // Check if this sector has been remapped
    rc = sparse_remap_chunk( volume, &sector );
    
    if ( rc < 0 ) { //Error
	bh->b_end_io(bh, 0);
	return rc;
    }
    
    if ( rc == 0 ) { // Remapped I/O : read or write same logic
	bh->b_rsector = volume->start + sector;
	bh->b_rdev = volume->dev->dev;
	return 1;
    }
    
    // ( Previously )Un-mapped:	read / write different logic
    
    if ( rw ) { //write :
	rc = sparse_cow_write( volume, &sector );
	
	if ( rc < 0 ) { //Error
	    bh->b_end_io(bh, 0);
	    return rc;
	}		    
	//Send write on
	bh->b_rsector = volume->start + sector;
	bh->b_rdev = volume->dev->dev;
	return 1;
    }
    
    //Reading something that was never written 
    //return zeros and indicate complete
    memset(bh->b_data, 0x0, bh->b_size);
    bh->b_end_io(bh, 1);
    return 0;
}

static int sparse_status( struct dm_target *ti, status_type_t type, 
			  char *result, unsigned int maxlen )
{
    struct sparse_volume * vol = (struct sparse_volume * )ti->private;
    
    switch(type) {

    case STATUSTYPE_INFO:
	snprintf( result, maxlen, "%d%%", 
		  ( vol->next_free_chunk * 100 ) / vol->num_chunks );	
	break;
	
    case STATUSTYPE_TABLE:
	snprintf( result, maxlen, "%s %Lu %u %u", 
		  dm_kdevname(vol->dev->dev), vol->start, 
		  vol->chunk_size, vol->num_chunks ); 
	break;

    default:
	break;
    }
    
    return 0;
}

/****************** FUNCTION TABLE **********************/

static struct target_type sparse_target = {
    .name = "sparse",
    .module = THIS_MODULE,
    .ctr = sparse_ctr,
    .dtr = sparse_dtr,
    .map = sparse_map,
    .status = sparse_status,
};

/********************* REGISTRATION *****************/

int __init sparse_init(void)
{
    int rc = dm_register_target(&sparse_target);

    if ( rc < 0 )
	DMWARN("sparse target registration failed");

    return rc;
}

void __exit sparse_exit(void)
{
    if (dm_unregister_target(&sparse_target) )
	DMWARN("sparse target unregistration failed");

    return;
}

module_init(sparse_init);
module_exit(sparse_exit);
MODULE_LICENSE("GPL");
