/*
 * RAID-0 driver code for the raidreconf utility
 * (C) 1999,2000 by Jakob Oestergaard
 *
 * This source is covered by the GNU GPL, the same as all Linux kernel
 * sources.
 */

#include "raidreconf.h"
#include "rrc_common.h"

typedef struct rrc_zone_t {
	unsigned long tchunks;	/* total size of zone */
	unsigned long ndisks;
	rrc_disk_t **disks;
	unsigned long *disk_nrs;	/* mapping of zone disk # into real (zone[0]) disk # */
} rrc_zone_t;

typedef struct raid0_driver_priv {
	rrc_zone_t *zones;
	rrc_disk_t *disks;
	int n_zones;
	unsigned long tot_chunks;
	unsigned long blocks_per_chunk;
	/* state for the current non-source sink-disk we're wishing for blocks to (pass 1) */
	int cur_disk;		/* initialized to -1,  if it's equal to zones[0].ndisks then we're in pass 2 */
	unsigned long *dsk_total_blocks;
	unsigned long *dsk_blocks_done;
} raid0_driver_priv;

static const char *raid0_initialize (void *, md_cfg_entry_t *,
				     rrc_disk_t *, unsigned long *);
static driver_status_t raid0_request_blocks (void *);
static const char *raid0_update_super (void *);
static const char *raid0_map_global_to_local (void *, unsigned long, int *,

					      unsigned long *);
static unsigned long raid0_map_local_to_global (void *, int,

						unsigned long);
static void raid0_free_blocks_above_gblock (void *, unsigned long);
static void raid0_unfree_all_blocks (void *);

static int
calc_raid_zones (rrc_disk_t * disks, int ndisks, rrc_zone_t ** zones,
		 int *nzones)
{
	int i;
	int current_zone_disk_size = 0;
	unsigned long passed_dchunks = 0;

	assert (ndisks);
	*nzones = 0;
	*zones = (rrc_zone_t *) malloc (sizeof (rrc_zone_t) * ndisks);
	if (!*zones) {
		fprintf (stderr, "Cannot alloc. zone table\n");
		return 1;
	}
	/* Step thru disks */
	for (i = 0; i != ndisks; i++) {
		int d;
		rrc_disk_t *smallest = 0;

		for (d = 0; d != ndisks; d++) {
			if (!smallest
			    || ((disks[d].chunks < smallest->chunks)))
				    if (disks[d].chunks >
					current_zone_disk_size) smallest =
					    &disks[d];
		}
		/* Smallest i null, or it holds the smallest disk which
		 * can go into the new zone (eg. is larger than previous zone) */
		if (smallest)
			fprintf (stderr, "Smallest is: %lu chunks\n",
				 smallest->chunks);
		else
			fprintf (stderr, "No smallest\n");

		if (!smallest)
			break;

		current_zone_disk_size = smallest->chunks;

		/* Now find all disks of this size or above */
		(*zones)[*nzones].ndisks = 0;
		(*zones)[*nzones].tchunks = 0;
		
		    (*zones)[*nzones].disks =
		    (rrc_disk_t **) malloc (sizeof (rrc_disk_t *) *
					    ndisks);
		(*zones)[*nzones].disk_nrs =
		    (unsigned long *) malloc (sizeof (unsigned long) *

					      ndisks);
		if (!(*zones)[*nzones].disks
		    && !(*zones)[*nzones].disk_nrs) {
			fprintf (stderr,
				 "zone->disk or zone->disk_nr array allocation failure\n");
			return 1;
		}
		for (d = 0; d != ndisks; d++) {
			if (disks[d].chunks >= smallest->chunks) {
				fprintf (stderr,
					 "Adding disk %i to zone %i as disk # %lu\n",
					 d, *nzones,
					 (*zones)[*nzones].ndisks);
				(*zones)[*nzones].disks[(*zones)[*nzones].
							ndisks] =
				    &disks[d];
				(*zones)[*nzones].
				    disk_nrs[(*zones)[*nzones].ndisks] = d;
				(*zones)[*nzones].ndisks++;
				
				    (*zones)[*nzones].tchunks +=
				    smallest->chunks - passed_dchunks;
			}
		}
		fprintf (stderr, "Size of zone %i is %lu chunks.\n",
			 *nzones, (*zones)[*nzones].tchunks);
		(*nzones)++;
		current_zone_disk_size++;
		passed_dchunks = smallest->chunks;
	}
	return 0;
}

/* returns the global block number from a sink disk id and disk-block */
static unsigned long
raid0_map_local_to_global (void *thisp, int diskid, unsigned long dblock)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;

	unsigned long passed_gblocks = 0;
	unsigned long passed_dblocks = 0;
	unsigned long czone = 0;

	while (czone < this->n_zones) {
		/* Is dchunk in this zone ? */
		if (this->zones[czone].tchunks * this->blocks_per_chunk /
		    this->zones[czone].ndisks > dblock - passed_dblocks) {
			unsigned long stripe_index;

			/* Find the stripe index that yield disk as the disk number */
			for (stripe_index = 0;
			     stripe_index != this->zones[czone].ndisks;
			     stripe_index++) {
				if (this->
				    disks[this->zones[czone].
					  disk_nrs[
						   (passed_gblocks /
						    this->
						    blocks_per_chunk +
						    stripe_index) %
						   this->zones[czone].
						   ndisks]].disk_id ==
				    diskid) break;
			}
			if (stripe_index == this->zones[czone].ndisks) {
				fprintf (stderr,
					 "Bummer! No stripe index yields disk id %i as destination\nWill dump core.\n",
					 diskid);
				abort ();
			}
			/* global block = passed gblocks blocks
			 *  + zone disks * ((disk block - passed disk blocks) / blocks per chunk * blocks per chunk)
			 *  + disk index * blocks per chunk 
			 *  + (disk block - passed disk blocks) % blocks per chunk
			 */
			assert (dblock / this->blocks_per_chunk -
				passed_dblocks / this->blocks_per_chunk ==
				(dblock -
				 passed_dblocks) / this->blocks_per_chunk);
			return passed_gblocks + (dblock -
						 passed_dblocks) /
			    this->blocks_per_chunk *
			    this->blocks_per_chunk *
			    this->zones[czone].ndisks +
			    stripe_index * this->blocks_per_chunk +
			    (dblock -
			     passed_dblocks) % this->blocks_per_chunk;
		}
		else {
			/* Nope */
			/*      fprintf(stderr, "Chunk %lu is not in zone %lu\n", dchunk, czone); */
			passed_dblocks +=
			    this->zones[czone].tchunks /
			    this->zones[czone].ndisks *
			    this->blocks_per_chunk;
			passed_gblocks +=
			    this->zones[czone].tchunks *
			    this->blocks_per_chunk;
			czone++;
		}
	}
	/* BUG! */
	fprintf (stderr,
		 "Ouch ! block %lu on disk id %i is not in array!\nWill dump core.\n",
		 dblock, diskid);
	abort ();
}


level_driver_t *
new_raid0_driver (void)
{
	level_driver_t *drv =

	    (level_driver_t *) malloc (sizeof (level_driver_t));
	raid0_driver_priv *priv =

	    (raid0_driver_priv *) malloc (sizeof (raid0_driver_priv));
	if (!drv || !priv)
		return 0;

	drv->initialize = raid0_initialize;
	drv->request_blocks = raid0_request_blocks;
	drv->update_super = raid0_update_super;
	drv->map_global_to_local = raid0_map_global_to_local;
	drv->map_local_to_global = raid0_map_local_to_global;
	drv->free_blocks_above_gblock = raid0_free_blocks_above_gblock;
	drv->unfree_all_blocks = raid0_unfree_all_blocks;

	drv->priv = priv;
	priv->zones = 0;
	priv->disks = 0;
	priv->n_zones = 0;
	priv->tot_chunks = 0;
	priv->blocks_per_chunk = 0;
	priv->cur_disk = -1;
	priv->dsk_total_blocks = 0;
	priv->dsk_blocks_done = 0;

	return drv;
}


static const char *
raid0_initialize (void *thisp, md_cfg_entry_t * cfg, rrc_disk_t * cfgdisks,
		  unsigned long *blocks)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;
	unsigned long d;

	/* Calculate zones */
	if (calc_raid_zones
	    (cfgdisks, cfg->array.param.nr_disks, &this->zones,
	     &this->n_zones)) {
		return "Zone calculation failed!";
	}
	this->disks = cfgdisks;

	this->tot_chunks = 0;
	for (d = 0; d != this->n_zones; d++)
		this->tot_chunks += this->zones[d].tchunks;

	this->blocks_per_chunk =
	    (cfg->array.param.chunk_size / MD_BLK_SIZ) / reconf_block_size;

	*blocks = this->blocks_per_chunk * this->tot_chunks;

	this->dsk_total_blocks =
	    (unsigned long *) malloc (sizeof (unsigned long) *

				      cfg->array.param.nr_disks);
	if (!this->dsk_total_blocks)
		return "Cannot allocate disk blocks array";
	for (d = 0; d != cfg->array.param.nr_disks; d++)
		this->dsk_total_blocks[d] =
		    cfgdisks[d].chunks * this->blocks_per_chunk;

	this->dsk_blocks_done =
	    (unsigned long *) malloc (sizeof (unsigned long) *

				      cfg->array.param.nr_disks);
	if (!this->dsk_blocks_done)
		return "Cannot allocate disk blocks done array";
	for (d = 0; d != cfg->array.param.nr_disks; d++)
		this->dsk_blocks_done[d] = 0;

	fprintf (stderr,
		 "RAID0 Array size: %lu blocks (%lu Chunks) on %i disks\n",
		 this->blocks_per_chunk * this->tot_chunks,
		 this->tot_chunks, cfg->array.param.nr_disks);
	return 0;
}


/*
 * This routine will fill in the wish_list with wishes for blocks it would
 * like to write.
 */
static driver_status_t
raid0_request_blocks (void *thisp)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;

	/* Pass 1:
	 *  Find disk not in {source}
	 *    Request blocks from disk until no more wishes or disk is full
	 *
	 *  If no more wishes, return LDR_INCOMPLETE
	 *
	 * Pass 2:
	 *  While more wishes && still blocks left to write
	 *     For each disk in {sink}
	 *        Find max. number of contigous blocks, and wish for them (until sequence done or no more wishes)
	 *
	 *  If no more wishes, return LDR_INCOMPLETE
	 *  If there are still blocks that aren't marked free, return LDR_INCOMPLETE
	 *  Otherwise return LDR_DONE
	 */

	/* Pass 1: */
	assert (this->zones);
	while (can_wish_again ()
	       && this->cur_disk != this->zones[0].ndisks) {
		int dsk = this->cur_disk;

		/* if we have no disk, or if we just finished the one we had... */
		if (this->cur_disk == -1
		    || this->dsk_total_blocks[dsk] ==
		    this->dsk_blocks_done[dsk]) {
			/* pick next disk, don't exceed number of disks, keep picking next until we fail or reach a good one */
			for (dsk++; dsk != this->zones[0].ndisks; dsk++) {
				if (!is_diskid_in_source
				    (this->disks[dsk].disk_id)) break;
			}
		}
		/* if there are no more non-source disks to read from, skip to pass two */
		if (dsk == this->zones[0].ndisks)
			break;
		/* dsk is not in source, if it's a new disk setup the counters */
		if (dsk != this->cur_disk) {
			/*      fprintf(stderr, "\nChanging pass 1 sink-disk to %i\n", dsk); */
			this->cur_disk = dsk;
		}
		/* wish from this disk */
		while (can_wish_again ()
		       && this->dsk_total_blocks[this->cur_disk] !=
		       this->dsk_blocks_done[this->cur_disk]) {

			unsigned long gblock =
			    sink_driver->map_local_to_global (sink_driver->
							      priv,
							      this->
							      disks[this->
								    cur_disk].
							      disk_id,
							      this->
							      dsk_blocks_done

							      [this->
							       cur_disk]);

			assert (this->dsk_total_blocks[this->cur_disk] >
				this->dsk_blocks_done[this->cur_disk]);
			if (is_gblock_in_source (gblock)) {	/* DUH !!!  ASK ANYWAY !!!
								   Why was this not caught
								   ? Look for somewhere in
								   the common layer where
								   we might be discarding a
								   read... This place is
								   hardly where the error
								   is */
				/*
				 * We wish for this block, even if it is not in the source.
				 * Because maybe the disk location when mapped to sink disks
				 * (sink disks overlapping with sources) is in the source, and
				 * this will be caught in the common layer
				 */
#ifdef EXTRA_CONSISTENCY_CHECKS
				int tdisk;
				unsigned long tdblock;

				sink_driver->
				    map_global_to_local (sink_driver->priv,
							 gblock, &tdisk,
							 &tdblock);
				assert (tdisk ==
					this->disks[this->cur_disk].
					disk_id);
				assert (tdblock ==
					this->dsk_blocks_done[this->
							      cur_disk]);
#endif

				insert_wish (gblock);
			}
			this->dsk_blocks_done[this->cur_disk]++;
		}
	}

	/* If we stopped because we have no more wishes for one round, leave */
	if (!can_wish_again ())
		return LDR_INCOMPLETE;

	/* Pass two:
	 */
	while (can_wish_again ()) {
		/* Pick any disk */
		int dsk;
		int wished = 0;

		for (dsk = 0;
		     can_wish_again () && dsk != this->zones[0].ndisks;
		     dsk++) {
			/* If there are blocks left, wish for them */
			if (this->dsk_blocks_done[dsk] <
			    this->dsk_total_blocks[dsk]) {

				unsigned long gblock =
				    sink_driver->
				    map_local_to_global (sink_driver->priv,
							 this->disks[dsk].
							 disk_id,
							 this->
							 dsk_blocks_done

							 [dsk]);
				if (is_gblock_in_source (gblock)) {

#ifdef EXTRA_CONSISTENCY_CHECKS
					int tdisk;
					unsigned long tdblock;

					sink_driver->
					    map_global_to_local
					    (sink_driver->priv, gblock,
					     &tdisk, &tdblock);
					assert (tdisk ==
						this->disks[dsk].disk_id);
					assert (tdblock ==
						this->
						dsk_blocks_done[dsk]);
#endif

					insert_wish (gblock);
				}
				wished = 1;
				this->dsk_blocks_done[dsk]++;
			}
		}
		if (!wished && can_wish_again ()) {
			for (dsk = 0; dsk != this->zones[0].ndisks; dsk++)
				if (this->dsk_blocks_done[dsk] !=
				    this->dsk_total_blocks[dsk]) {
					fprintf (stderr,
						 "\nRAID-0 requester: We think we're done on disk %i, but we aren't [%lu/%lu]!\n",
						 dsk,
						 this->
						 dsk_blocks_done[dsk],
						 this->
						 dsk_total_blocks[dsk]);
					return LDR_FAILED;
				}
/*        fprintf(stderr, "\nRAID-0 requester:  No more wishes, and we could have wished again. We're done.\n"); */
			return LDR_DONE;
		}
	}

	return LDR_INCOMPLETE;
}

static const char *
raid0_update_super (void *thisp)
{
	/*  raid0_driver_priv * this = (raid0_driver_priv*)thisp; */

	unsigned long d;
	int mdfile;
	int rc;
	mdu_param_t mdpar;

	printf ("Updating superblocks...\n");
	if (analyze_sb (&ver, mkraid, new_md_cfg, 1, 0, 0)) {
		fprintf (stderr, "Error analyzing superblock.\n");
		return "RAID-0 Superblock analysis error";
	}

	/* Tell the kernel about this */
	mdfile = open (new_md_cfg->md_name, O_RDONLY);
	rc =
	    ioctl (mdfile, SET_ARRAY_INFO,
		   (unsigned long) &new_md_cfg->array.param);
	if (rc) {
		fprintf (stderr,
			 "Failed setting array info for device %s\n",
			 new_md_cfg->md_name);
		return "RAID-0 Superblock info error";
	}
	printf ("Array is updated with kernel.\n");

	for (d = 0; d != new_md_cfg->array.param.nr_disks; d++) {
		rc =
		    ioctl (mdfile, ADD_NEW_DISK,
			   (unsigned long) (new_md_cfg->array.disks + d));
		if (rc) {
			fprintf (stderr,
				 "Failed adding disk %lu to array\n", d);
			return "RAID-0 Superblock disk addition error";
		}
	}
	printf
	    ("Disks re-inserted in array... Hold on while starting the array...\n");

	close (mdfile);
	mdfile = open (new_md_cfg->md_name, O_RDWR);

	/* Now run the array ! */
	memset (&mdpar, 0, sizeof (mdpar));
	mdpar.personality = RAID0;
	mdpar.chunk_size = new_md_cfg->array.param.chunk_size;
	rc = ioctl (mdfile, RUN_ARRAY, (unsigned long) &mdpar);
	if (rc) {
		switch (errno) {
		case EBUSY:
			printf ("Array %s is already running\n",
				new_md_cfg->md_name);
			break;
		default:
			perror (new_md_cfg->md_name);
		}
		return "RAID-0 Superblock update error";
	}

	close (mdfile);

	return 0;
}

static const char *
raid0_map_global_to_local (void *thisp, unsigned long gblock, int *diskid,
			   unsigned long *lblock)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;

	unsigned long czone = 0;
	unsigned long passed_gchunks = 0;
	unsigned long lblock_offset = 0;
	int disk;
	unsigned long gchunk = gblock / this->blocks_per_chunk;

	assert (diskid);
	assert (lblock);
	while (czone != this->n_zones
	       && passed_gchunks + this->zones[czone].tchunks <= gchunk) {
		passed_gchunks += this->zones[czone].tchunks;
		lblock_offset +=
		    this->zones[czone].tchunks /
		    this->zones[czone].ndisks * this->blocks_per_chunk;
		czone++;
	}
	if (czone == this->n_zones) {
		/* If the block doesn't exist here */
		return "No such block!";
	}

	/* The disk calculation is not a bug. The kernel just works in mysterious ways... */
	disk =
	    this->zones[czone].disk_nrs[gchunk %
					this->zones[czone].ndisks];
	*lblock =
	    lblock_offset + (gchunk -
			     passed_gchunks) / this->zones[czone].ndisks *
	    this->blocks_per_chunk + gblock % this->blocks_per_chunk;
	*diskid = this->disks[disk].disk_id;

	return 0;
}


static void
raid0_free_blocks_above_gblock (void *thisp, unsigned long gblock)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;

	/*
	 * Iterate over zones,  mark blocks as free if they're >= gblock
	 */
	int czone;
	unsigned long passed_gchunks = 0;
	unsigned long gchunk = gblock / this->blocks_per_chunk;

	/* if there's a remainder, sink will lose a partial chunk - but who cares ? */
	for (czone = 0; czone != this->n_zones; czone++) {
		/* Should we free stuff in this zone ? */
		if (this->zones[czone].tchunks > gchunk - passed_gchunks) {
			/* Find zone offset */
			unsigned long zone_offset =
			    (gchunk -

			     passed_gchunks) / this->zones[czone].ndisks;
			/* Find disk to start with */
			int disk =
			    (gchunk -

			     passed_gchunks) % this->zones[czone].ndisks;
			/* Mark blocks free in that chunk and all following */
			int freed;
			unsigned long disk_block_offset =

			    zone_offset * this->blocks_per_chunk;
			do {
				freed = 0;
				for (; disk != this->zones[czone].ndisks;
				     disk++) {
					int disknr =
					    this->zones[czone].

					    disk_nrs[disk];
					int diskid =

					    this->disks[disknr].disk_id;
					if (disk_block_offset <
					    this->dsk_total_blocks[disknr]) {
						unsigned long block;

						freed = 1;
						for (block = 0;
						     block !=
						     this->
						     blocks_per_chunk;
						     block++)
							    unchecked_mark_disk_block_free
							    (diskid,
							     block +
							     disk_block_offset);
					}
				}
				disk = 0;
				disk_block_offset +=
				    this->blocks_per_chunk;
			} while (freed);
/*        fprintf(stderr, "Initial free freed %lu blocks\n", nr_free_blocks()); */
			return;
		}
		passed_gchunks += this->zones[czone].tchunks;
	}
}


static void
raid0_unfree_all_blocks (void *thisp)
{
	raid0_driver_priv *this = (raid0_driver_priv *) thisp;

	int dsk;

	for (dsk = 0; dsk != this->zones[0].ndisks; dsk++) {
		unsigned long block;

		for (block = 0; block != this->dsk_total_blocks[dsk];
		     block++)
			mark_disk_block_unfree (this->disks[dsk].disk_id,
						block);
	}

}
