patch-2.4.13 linux/drivers/message/i2o/i2o_block.c
Next file: linux/drivers/message/i2o/i2o_config.c
Previous file: linux/drivers/message/i2o/README.ioctl
Back to the patch index
Back to the overall index
- Lines: 2044
- Date:
Mon Oct 22 08:39:56 2001
- Orig file:
v2.4.12/linux/drivers/message/i2o/i2o_block.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.4.12/linux/drivers/message/i2o/i2o_block.c linux/drivers/message/i2o/i2o_block.c
@@ -0,0 +1,2043 @@
+/*
+ * I2O Random Block Storage Class OSM
+ *
+ * (C) Copyright 1999 Red Hat Software
+ *
+ * Written by Alan Cox, Building Number Three Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This is a beta test release. Most of the good code was taken
+ * from the nbd driver by Pavel Machek, who in turn took some of it
+ * from loop.c. Isn't free software great for reusability 8)
+ *
+ * Fixes/additions:
+ * Steve Ralston:
+ * Multiple device handling error fixes,
+ * Added a queue depth.
+ * Alan Cox:
+ * FC920 has an rmw bug. Dont or in the end marker.
+ * Removed queue walk, fixed for 64bitness.
+ * Deepak Saxena:
+ * Independent queues per IOP
+ * Support for dynamic device creation/deletion
+ * Code cleanup
+ * Support for larger I/Os through merge* functions
+ * (taken from DAC960 driver)
+ * Boji T Kannanthanam:
+ * Set the I2O Block devices to be detected in increasing
+ * order of TIDs during boot.
+ * Search and set the I2O block device that we boot off from as
+ * the first device to be claimed (as /dev/i2o/hda)
+ * Properly attach/detach I2O gendisk structure from the system
+ * gendisk list. The I2O block devices now appear in
+ * /proc/partitions.
+ *
+ * To do:
+ * Serial number scanning to find duplicates for FC multipathing
+ */
+
+#include <linux/major.h>
+
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/ioctl.h>
+#include <linux/i2o.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/slab.h>
+#include <linux/hdreg.h>
+#include <linux/spinlock.h>
+
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/completion.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <linux/smp_lock.h>
+#include <linux/wait.h>
+
+#define MAJOR_NR I2O_MAJOR
+
+#include <linux/blk.h>
+
+#define MAX_I2OB 16
+
+#define MAX_I2OB_DEPTH 128
+#define MAX_I2OB_RETRIES 4
+
+//#define DRIVERDEBUG
+#ifdef DRIVERDEBUG
+#define DEBUG( s ) printk( s )
+#else
+#define DEBUG( s )
+#endif
+
+/*
+ * Events that this OSM is interested in
+ */
+#define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \
+ I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
+ I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
+ I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
+ I2O_EVT_IND_BSA_SCSI_SMART )
+
+
+/*
+ * I2O Block Error Codes - should be in a header file really...
+ */
+#define I2O_BSA_DSC_SUCCESS 0x0000
+#define I2O_BSA_DSC_MEDIA_ERROR 0x0001
+#define I2O_BSA_DSC_ACCESS_ERROR 0x0002
+#define I2O_BSA_DSC_DEVICE_FAILURE 0x0003
+#define I2O_BSA_DSC_DEVICE_NOT_READY 0x0004
+#define I2O_BSA_DSC_MEDIA_NOT_PRESENT 0x0005
+#define I2O_BSA_DSC_MEDIA_LOCKED 0x0006
+#define I2O_BSA_DSC_MEDIA_FAILURE 0x0007
+#define I2O_BSA_DSC_PROTOCOL_FAILURE 0x0008
+#define I2O_BSA_DSC_BUS_FAILURE 0x0009
+#define I2O_BSA_DSC_ACCESS_VIOLATION 0x000A
+#define I2O_BSA_DSC_WRITE_PROTECTED 0x000B
+#define I2O_BSA_DSC_DEVICE_RESET 0x000C
+#define I2O_BSA_DSC_VOLUME_CHANGED 0x000D
+#define I2O_BSA_DSC_TIMEOUT 0x000E
+
+/*
+ * Some of these can be made smaller later
+ */
+
+static int i2ob_blksizes[MAX_I2OB<<4];
+static int i2ob_hardsizes[MAX_I2OB<<4];
+static int i2ob_sizes[MAX_I2OB<<4];
+static int i2ob_media_change_flag[MAX_I2OB];
+static u32 i2ob_max_sectors[MAX_I2OB<<4];
+
+static int i2ob_context;
+
+/*
+ * I2O Block device descriptor
+ */
+struct i2ob_device
+{
+ struct i2o_controller *controller;
+ struct i2o_device *i2odev;
+ int unit;
+ int tid;
+ int flags;
+ int refcnt;
+ struct request *head, *tail;
+ request_queue_t *req_queue;
+ int max_segments;
+ int done_flag;
+ int constipated;
+ int depth;
+};
+
+/*
+ * FIXME:
+ * We should cache align these to avoid ping-ponging lines on SMP
+ * boxes under heavy I/O load...
+ */
+struct i2ob_request
+{
+ struct i2ob_request *next;
+ struct request *req;
+ int num;
+};
+
+/*
+ * Per IOP requst queue information
+ *
+ * We have a separate requeust_queue_t per IOP so that a heavilly
+ * loaded I2O block device on an IOP does not starve block devices
+ * across all I2O controllers.
+ *
+ */
+struct i2ob_iop_queue
+{
+ atomic_t queue_depth;
+ struct i2ob_request request_queue[MAX_I2OB_DEPTH];
+ struct i2ob_request *i2ob_qhead;
+ request_queue_t req_queue;
+};
+static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
+static struct i2ob_request *i2ob_backlog[MAX_I2O_CONTROLLERS];
+static struct i2ob_request *i2ob_backlog_tail[MAX_I2O_CONTROLLERS];
+
+/*
+ * Each I2O disk is one of these.
+ */
+
+static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
+static int i2ob_dev_count = 0;
+static struct hd_struct i2ob[MAX_I2OB<<4];
+static struct gendisk i2ob_gendisk; /* Declared later */
+
+/*
+ * Mutex and spin lock for event handling synchronization
+ * evt_msg contains the last event.
+ */
+static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
+static DECLARE_COMPLETION(i2ob_thread_dead);
+static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
+static u32 evt_msg[MSG_FRAME_SIZE>>2];
+
+static struct timer_list i2ob_timer;
+static int i2ob_timer_started = 0;
+
+static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
+ struct i2o_message *);
+static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
+static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
+static void i2ob_reboot_event(void);
+static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
+static void i2ob_end_request(struct request *);
+static void i2ob_request(request_queue_t *);
+static int i2ob_backlog_request(struct i2o_controller *, struct i2ob_device *);
+static int i2ob_init_iop(unsigned int);
+static request_queue_t* i2ob_get_queue(kdev_t);
+static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
+static int do_i2ob_revalidate(kdev_t, int);
+static int i2ob_evt(void *);
+
+static int evt_pid = 0;
+static int evt_running = 0;
+static int scan_unit = 0;
+
+/*
+ * I2O OSM registration structure...keeps getting bigger and bigger :)
+ */
+static struct i2o_handler i2o_block_handler =
+{
+ i2o_block_reply,
+ i2ob_new_device,
+ i2ob_del_device,
+ i2ob_reboot_event,
+ "I2O Block OSM",
+ 0,
+ I2O_CLASS_RANDOM_BLOCK_STORAGE
+};
+
+/*
+ * Get a message
+ */
+
+static u32 i2ob_get(struct i2ob_device *dev)
+{
+ struct i2o_controller *c=dev->controller;
+ return I2O_POST_READ32(c);
+}
+
+/*
+ * Turn a Linux block request into an I2O block read/write.
+ */
+
+static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit)
+{
+ struct i2o_controller *c = dev->controller;
+ int tid = dev->tid;
+ unsigned long msg;
+ unsigned long mptr;
+ u64 offset;
+ struct request *req = ireq->req;
+ struct buffer_head *bh = req->bh;
+ int count = req->nr_sectors<<9;
+ char *last = NULL;
+ unsigned short size = 0;
+
+ // printk(KERN_INFO "i2ob_send called\n");
+ /* Map the message to a virtual address */
+ msg = c->mem_offset + m;
+
+ /*
+ * Build the message based on the request.
+ */
+ __raw_writel(i2ob_context|(unit<<8), msg+8);
+ __raw_writel(ireq->num, msg+12);
+ __raw_writel(req->nr_sectors << 9, msg+20);
+
+ /*
+ * Mask out partitions from now on
+ */
+ unit &= 0xF0;
+
+ /* This can be optimised later - just want to be sure its right for
+ starters */
+ offset = ((u64)(req->sector+base)) << 9;
+ __raw_writel( offset & 0xFFFFFFFF, msg+24);
+ __raw_writel(offset>>32, msg+28);
+ mptr=msg+32;
+
+ if(req->cmd == READ)
+ {
+ __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
+ while(bh!=NULL)
+ {
+ if(bh->b_data == last) {
+ size += bh->b_size;
+ last += bh->b_size;
+ if(bh->b_reqnext)
+ __raw_writel(0x14000000|(size), mptr-8);
+ else
+ __raw_writel(0xD4000000|(size), mptr-8);
+ }
+ else
+ {
+ if(bh->b_reqnext)
+ __raw_writel(0x10000000|(bh->b_size), mptr);
+ else
+ __raw_writel(0xD0000000|(bh->b_size), mptr);
+ __raw_writel(virt_to_bus(bh->b_data), mptr+4);
+ mptr += 8;
+ size = bh->b_size;
+ last = bh->b_data + size;
+ }
+
+ count -= bh->b_size;
+ bh = bh->b_reqnext;
+ }
+ /*
+ * Heuristic for now since the block layer doesnt give
+ * us enough info. If its a big write assume sequential
+ * readahead on controller. If its small then don't read
+ * ahead but do use the controller cache.
+ */
+ if(size >= 8192)
+ __raw_writel((8<<24)|(1<<16)|8, msg+16);
+ else
+ __raw_writel((8<<24)|(1<<16)|4, msg+16);
+ }
+ else if(req->cmd == WRITE)
+ {
+ __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
+ while(bh!=NULL)
+ {
+ if(bh->b_data == last) {
+ size += bh->b_size;
+ last += bh->b_size;
+ if(bh->b_reqnext)
+ __raw_writel(0x14000000|(size), mptr-8);
+ else
+ __raw_writel(0xD4000000|(size), mptr-8);
+ }
+ else
+ {
+ if(bh->b_reqnext)
+ __raw_writel(0x14000000|(bh->b_size), mptr);
+ else
+ __raw_writel(0xD4000000|(bh->b_size), mptr);
+ __raw_writel(virt_to_bus(bh->b_data), mptr+4);
+ mptr += 8;
+ size = bh->b_size;
+ last = bh->b_data + size;
+ }
+
+ count -= bh->b_size;
+ bh = bh->b_reqnext;
+ }
+
+ if(c->battery)
+ {
+
+ if(size>16384)
+ __raw_writel(4, msg+16);
+ else
+ /*
+ * Allow replies to come back once data is cached in the controller
+ * This allows us to handle writes quickly thus giving more of the
+ * queue to reads.
+ */
+ __raw_writel(16, msg+16);
+ }
+ else
+ {
+ /* Large write, don't cache */
+ if(size>8192)
+ __raw_writel(4, msg+16);
+ else
+ /* write through */
+ __raw_writel(8, msg+16);
+ }
+ }
+ __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
+
+ if(count != 0)
+ {
+ printk(KERN_ERR "Request count botched by %d.\n", count);
+ }
+
+ i2o_post_message(c,m);
+ atomic_inc(&i2ob_queues[c->unit]->queue_depth);
+
+ return 0;
+}
+
+/*
+ * Remove a request from the _locked_ request list. We update both the
+ * list chain and if this is the last item the tail pointer. Caller
+ * must hold the lock.
+ */
+
+static inline void i2ob_unhook_request(struct i2ob_request *ireq,
+ unsigned int iop)
+{
+ ireq->next = i2ob_queues[iop]->i2ob_qhead;
+ i2ob_queues[iop]->i2ob_qhead = ireq;
+}
+
+/*
+ * Request completion handler
+ */
+
+static inline void i2ob_end_request(struct request *req)
+{
+ /*
+ * Loop until all of the buffers that are linked
+ * to this request have been marked updated and
+ * unlocked.
+ */
+
+ while (end_that_request_first( req, !req->errors, "i2o block" ));
+
+ /*
+ * It is now ok to complete the request.
+ */
+ end_that_request_last( req );
+}
+
+/*
+ * Request merging functions
+ */
+static inline int i2ob_new_segment(request_queue_t *q, struct request *req,
+ int __max_segments)
+{
+ int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
+
+ if (__max_segments < max_segments)
+ max_segments = __max_segments;
+
+ if (req->nr_segments < max_segments) {
+ req->nr_segments++;
+ return 1;
+ }
+ return 0;
+}
+
+static int i2ob_back_merge(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int __max_segments)
+{
+ if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+ return 1;
+ return i2ob_new_segment(q, req, __max_segments);
+}
+
+static int i2ob_front_merge(request_queue_t *q, struct request *req,
+ struct buffer_head *bh, int __max_segments)
+{
+ if (bh->b_data + bh->b_size == req->bh->b_data)
+ return 1;
+ return i2ob_new_segment(q, req, __max_segments);
+}
+
+static int i2ob_merge_requests(request_queue_t *q,
+ struct request *req,
+ struct request *next,
+ int __max_segments)
+{
+ int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
+ int total_segments = req->nr_segments + next->nr_segments;
+
+ if (__max_segments < max_segments)
+ max_segments = __max_segments;
+
+ if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+ total_segments--;
+
+ if (total_segments > max_segments)
+ return 0;
+
+ req->nr_segments = total_segments;
+ return 1;
+}
+
+static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
+{
+ unsigned long msg;
+ u32 m = i2ob_get(d);
+
+ if(m == 0xFFFFFFFF)
+ return -1;
+
+ msg = c->mem_offset + m;
+
+ /*
+ * Ask the controller to write the cache back. This sorts out
+ * the supertrak firmware flaw and also does roughly the right
+ * thing for other cases too.
+ */
+
+ __raw_writel(FIVE_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
+ __raw_writel(I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|d->tid, msg+4);
+ __raw_writel(i2ob_context|(unit<<8), msg+8);
+ __raw_writel(0, msg+12);
+ __raw_writel(60<<16, msg+16);
+
+ i2o_post_message(c,m);
+ return 0;
+}
+
+/*
+ * OSM reply handler. This gets all the message replies
+ */
+
+static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
+{
+ unsigned long flags;
+ struct i2ob_request *ireq = NULL;
+ u8 st;
+ u32 *m = (u32 *)msg;
+ u8 unit = (m[2]>>8)&0xF0; /* low 4 bits are partition */
+ struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
+
+ /*
+ * Pull the lock over ready
+ */
+
+ spin_lock_prefetch(&io_request_lock);
+
+ /*
+ * FAILed message
+ */
+ if(m[0] & (1<<13))
+ {
+ /*
+ * FAILed message from controller
+ * We increment the error count and abort it
+ *
+ * In theory this will never happen. The I2O block class
+ * speficiation states that block devices never return
+ * FAILs but instead use the REQ status field...but
+ * better be on the safe side since no one really follows
+ * the spec to the book :)
+ */
+ ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+ ireq->req->errors++;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ i2ob_unhook_request(ireq, c->unit);
+ i2ob_end_request(ireq->req);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ /* Now flush the message by making it a NOP */
+ m[0]&=0x00FFFFFF;
+ m[0]|=(I2O_CMD_UTIL_NOP)<<24;
+ i2o_post_message(c,virt_to_bus(m));
+
+ return;
+ }
+
+ if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
+ {
+ spin_lock(&i2ob_evt_lock);
+ memcpy(evt_msg, msg, (m[0]>>16)<<2);
+ spin_unlock(&i2ob_evt_lock);
+ up(&i2ob_evt_sem);
+ return;
+ }
+
+ if(msg->function == I2O_CMD_BLOCK_CFLUSH)
+ {
+ spin_lock_irqsave(&io_request_lock, flags);
+ dev->constipated=0;
+ DEBUG(("unconstipated\n"));
+ if(i2ob_backlog_request(c, dev)==0)
+ i2ob_request(dev->req_queue);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+
+ if(!dev->i2odev)
+ {
+ /*
+ * This is HACK, but Intel Integrated RAID allows user
+ * to delete a volume that is claimed, locked, and in use
+ * by the OS. We have to check for a reply from a
+ * non-existent device and flag it as an error or the system
+ * goes kaput...
+ */
+ ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+ ireq->req->errors++;
+ printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
+ spin_lock_irqsave(&io_request_lock, flags);
+ i2ob_unhook_request(ireq, c->unit);
+ i2ob_end_request(ireq->req);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+
+ /*
+ * Lets see what is cooking. We stuffed the
+ * request in the context.
+ */
+
+ ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
+ st=m[4]>>24;
+
+ if(st!=0)
+ {
+ int err;
+ char *bsa_errors[] =
+ {
+ "Success",
+ "Media Error",
+ "Failure communicating to device",
+ "Device Failure",
+ "Device is not ready",
+ "Media not present",
+ "Media is locked by another user",
+ "Media has failed",
+ "Failure communicating to device",
+ "Device bus failure",
+ "Device is locked by another user",
+ "Device is write protected",
+ "Device has reset",
+ "Volume has changed, waiting for acknowledgement"
+ };
+
+ err = m[4]&0xFFFF;
+
+ /*
+ * Device not ready means two things. One is that the
+ * the thing went offline (but not a removal media)
+ *
+ * The second is that you have a SuperTrak 100 and the
+ * firmware got constipated. Unlike standard i2o card
+ * setups the supertrak returns an error rather than
+ * blocking for the timeout in these cases.
+ */
+
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ if(err==4)
+ {
+ /*
+ * Time to uncork stuff
+ */
+
+ if(!dev->constipated)
+ {
+ dev->constipated = 1;
+ DEBUG(("constipated\n"));
+ /* Now pull the chain */
+ if(i2ob_flush(c, dev, unit)<0)
+ {
+ DEBUG(("i2ob: Unable to queue flush. Retrying I/O immediately.\n"));
+ dev->constipated=0;
+ }
+ DEBUG(("flushing\n"));
+ }
+
+ /*
+ * Recycle the request
+ */
+
+// i2ob_unhook_request(ireq, c->unit);
+
+ /*
+ * Place it on the recycle queue
+ */
+
+ ireq->next = NULL;
+ if(i2ob_backlog_tail[c->unit]!=NULL)
+ i2ob_backlog_tail[c->unit]->next = ireq;
+ else
+ i2ob_backlog[c->unit] = ireq;
+ i2ob_backlog_tail[c->unit] = ireq;
+
+ atomic_dec(&i2ob_queues[c->unit]->queue_depth);
+
+ /*
+ * If the constipator flush failed we want to
+ * poke the queue again.
+ */
+
+ i2ob_request(dev->req_queue);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ /*
+ * and out
+ */
+
+ return;
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name,
+ bsa_errors[m[4]&0XFFFF]);
+ if(m[4]&0x00FF0000)
+ printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
+ printk(".\n");
+ ireq->req->errors++;
+ }
+ else
+ ireq->req->errors = 0;
+
+ /*
+ * Dequeue the request. We use irqsave locks as one day we
+ * may be running polled controllers from a BH...
+ */
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ i2ob_unhook_request(ireq, c->unit);
+ i2ob_end_request(ireq->req);
+ atomic_dec(&i2ob_queues[c->unit]->queue_depth);
+
+ /*
+ * We may be able to do more I/O
+ */
+
+ if(i2ob_backlog_request(c, dev)==0)
+ i2ob_request(dev->req_queue);
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Event handler. Needs to be a separate thread b/c we may have
+ * to do things like scan a partition table, or query parameters
+ * which cannot be done from an interrupt or from a bottom half.
+ */
+static int i2ob_evt(void *dummy)
+{
+ unsigned int evt;
+ unsigned long flags;
+ int unit;
+ int i;
+ //The only event that has data is the SCSI_SMART event.
+ struct i2o_reply {
+ u32 header[4];
+ u32 evt_indicator;
+ u8 ASC;
+ u8 ASCQ;
+ u8 data[16];
+ } *evt_local;
+
+ lock_kernel();
+ daemonize();
+ unlock_kernel();
+
+ strcpy(current->comm, "i2oblock");
+ evt_running = 1;
+
+ while(1)
+ {
+ if(down_interruptible(&i2ob_evt_sem))
+ {
+ evt_running = 0;
+ printk("exiting...");
+ break;
+ }
+
+ /*
+ * Keep another CPU/interrupt from overwriting the
+ * message while we're reading it
+ *
+ * We stuffed the unit in the TxContext and grab the event mask
+ * None of the BSA we care about events have EventData
+ */
+ spin_lock_irqsave(&i2ob_evt_lock, flags);
+ evt_local = (struct i2o_reply *)evt_msg;
+ spin_unlock_irqrestore(&i2ob_evt_lock, flags);
+
+ unit = evt_local->header[3];
+ evt = evt_local->evt_indicator;
+
+ switch(evt)
+ {
+ /*
+ * New volume loaded on same TID, so we just re-install.
+ * The TID/controller don't change as it is the same
+ * I2O device. It's just new media that we have to
+ * rescan.
+ */
+ case I2O_EVT_IND_BSA_VOLUME_LOAD:
+ {
+ i2ob_install_device(i2ob_dev[unit].i2odev->controller,
+ i2ob_dev[unit].i2odev, unit);
+ break;
+ }
+
+ /*
+ * No media, so set all parameters to 0 and set the media
+ * change flag. The I2O device is still valid, just doesn't
+ * have media, so we don't want to clear the controller or
+ * device pointer.
+ */
+ case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
+ {
+ for(i = unit; i <= unit+15; i++)
+ {
+ i2ob_sizes[i] = 0;
+ i2ob_hardsizes[i] = 0;
+ i2ob_max_sectors[i] = 0;
+ i2ob[i].nr_sects = 0;
+ i2ob_gendisk.part[i].nr_sects = 0;
+ }
+ i2ob_media_change_flag[unit] = 1;
+ break;
+ }
+
+ case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
+ printk(KERN_WARNING "%s: Attempt to eject locked media\n",
+ i2ob_dev[unit].i2odev->dev_name);
+ break;
+
+ /*
+ * The capacity has changed and we are going to be
+ * updating the max_sectors and other information
+ * about this disk. We try a revalidate first. If
+ * the block device is in use, we don't want to
+ * do that as there may be I/Os bound for the disk
+ * at the moment. In that case we read the size
+ * from the device and update the information ourselves
+ * and the user can later force a partition table
+ * update through an ioctl.
+ */
+ case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
+ {
+ u64 size;
+
+ if(do_i2ob_revalidate(MKDEV(MAJOR_NR, unit),0) != -EBUSY)
+ continue;
+
+ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
+ i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ i2ob_sizes[unit] = (int)(size>>10);
+ i2ob_gendisk.part[unit].nr_sects = size>>9;
+ i2ob[unit].nr_sects = (int)(size>>9);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ break;
+ }
+
+ /*
+ * We got a SCSI SMART event, we just log the relevant
+ * information and let the user decide what they want
+ * to do with the information.
+ */
+ case I2O_EVT_IND_BSA_SCSI_SMART:
+ {
+ char buf[16];
+ printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
+ evt_local->data[16]='\0';
+ sprintf(buf,"%s",&evt_local->data[0]);
+ printk(KERN_INFO " Disk Serial#:%s\n",buf);
+ printk(KERN_INFO " ASC 0x%02x \n",evt_local->ASC);
+ printk(KERN_INFO " ASCQ 0x%02x \n",evt_local->ASCQ);
+ break;
+ }
+
+ /*
+ * Non event
+ */
+
+ case 0:
+ break;
+
+ /*
+ * An event we didn't ask for. Call the card manufacturer
+ * and tell them to fix their firmware :)
+ */
+ default:
+ printk(KERN_INFO "%s: Received event %d we didn't register for\n"
+ KERN_INFO " Blame the I2O card manufacturer 8)\n",
+ i2ob_dev[unit].i2odev->dev_name, evt);
+ break;
+ }
+ };
+
+ complete_and_exit(&i2ob_thread_dead,0);
+ return 0;
+}
+
+/*
+ * The timer handler will attempt to restart requests
+ * that are queued to the driver. This handler
+ * currently only gets called if the controller
+ * had no more room in its inbound fifo.
+ */
+
+static void i2ob_timer_handler(unsigned long q)
+{
+ unsigned long flags;
+
+ /*
+ * We cannot touch the request queue or the timer
+ * flag without holding the io_request_lock.
+ */
+ spin_lock_irqsave(&io_request_lock,flags);
+
+ /*
+ * Clear the timer started flag so that
+ * the timer can be queued again.
+ */
+ i2ob_timer_started = 0;
+
+ /*
+ * Restart any requests.
+ */
+ i2ob_request((request_queue_t*)q);
+
+ /*
+ * Free the lock.
+ */
+ spin_unlock_irqrestore(&io_request_lock,flags);
+}
+
+static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev)
+{
+ u32 m;
+ struct i2ob_request *ireq;
+
+ while((ireq=i2ob_backlog[c->unit])!=NULL)
+ {
+ int unit;
+
+ if(atomic_read(&i2ob_queues[c->unit]->queue_depth) > dev->depth/4)
+ break;
+
+ m = i2ob_get(dev);
+ if(m == 0xFFFFFFFF)
+ break;
+
+ i2ob_backlog[c->unit] = ireq->next;
+ if(i2ob_backlog[c->unit] == NULL)
+ i2ob_backlog_tail[c->unit] = NULL;
+
+ unit = MINOR(ireq->req->rq_dev);
+ i2ob_send(m, dev, ireq, i2ob[unit].start_sect, unit);
+ }
+ if(i2ob_backlog[c->unit])
+ return 1;
+ return 0;
+}
+
+/*
+ * The I2O block driver is listed as one of those that pulls the
+ * front entry off the queue before processing it. This is important
+ * to remember here. If we drop the io lock then CURRENT will change
+ * on us. We must unlink CURRENT in this routine before we return, if
+ * we use it.
+ */
+
+static void i2ob_request(request_queue_t *q)
+{
+ struct request *req;
+ struct i2ob_request *ireq;
+ int unit;
+ struct i2ob_device *dev;
+ u32 m;
+
+
+ while (!list_empty(&q->queue_head)) {
+ /*
+ * On an IRQ completion if there is an inactive
+ * request on the queue head it means it isnt yet
+ * ready to dispatch.
+ */
+ req = blkdev_entry_next_request(&q->queue_head);
+
+ if(req->rq_status == RQ_INACTIVE)
+ return;
+
+ unit = MINOR(req->rq_dev);
+ dev = &i2ob_dev[(unit&0xF0)];
+
+ /*
+ * Queue depths probably belong with some kind of
+ * generic IOP commit control. Certainly its not right
+ * its global!
+ */
+ if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
+ break;
+
+ /*
+ * Is the channel constipated ?
+ */
+
+ if(i2ob_backlog[dev->unit]!=NULL)
+ break;
+
+ /* Get a message */
+ m = i2ob_get(dev);
+
+ if(m==0xFFFFFFFF)
+ {
+ /*
+ * See if the timer has already been queued.
+ */
+ if (!i2ob_timer_started)
+ {
+ DEBUG((KERN_ERR "i2ob: starting timer\n"));
+
+ /*
+ * Set the timer_started flag to insure
+ * that the timer is only queued once.
+ * Queing it more than once will corrupt
+ * the timer queue.
+ */
+ i2ob_timer_started = 1;
+
+ /*
+ * Set up the timer to expire in
+ * 500ms.
+ */
+ i2ob_timer.expires = jiffies + (HZ >> 1);
+ i2ob_timer.data = (unsigned int)q;
+
+ /*
+ * Start it.
+ */
+
+ add_timer(&i2ob_timer);
+ return;
+ }
+ }
+
+ /*
+ * Everything ok, so pull from kernel queue onto our queue
+ */
+ req->errors = 0;
+ blkdev_dequeue_request(req);
+ req->waiting = NULL;
+
+ ireq = i2ob_queues[dev->unit]->i2ob_qhead;
+ i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
+ ireq->req = req;
+
+ i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0));
+ }
+}
+
+
+/*
+ * SCSI-CAM for ioctl geometry mapping
+ * Duplicated with SCSI - this should be moved into somewhere common
+ * perhaps genhd ?
+ *
+ * LBA -> CHS mapping table taken from:
+ *
+ * "Incorporating the I2O Architecture into BIOS for Intel Architecture
+ * Platforms"
+ *
+ * This is an I2O document that is only available to I2O members,
+ * not developers.
+ *
+ * From my understanding, this is how all the I2O cards do this
+ *
+ * Disk Size | Sectors | Heads | Cylinders
+ * ---------------+---------+-------+-------------------
+ * 1 < X <= 528M | 63 | 16 | X/(63 * 16 * 512)
+ * 528M < X <= 1G | 63 | 32 | X/(63 * 32 * 512)
+ * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
+ * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
+ *
+ */
+#define BLOCK_SIZE_528M 1081344
+#define BLOCK_SIZE_1G 2097152
+#define BLOCK_SIZE_21G 4403200
+#define BLOCK_SIZE_42G 8806400
+#define BLOCK_SIZE_84G 17612800
+
+static void i2o_block_biosparam(
+ unsigned long capacity,
+ unsigned short *cyls,
+ unsigned char *hds,
+ unsigned char *secs)
+{
+ unsigned long heads, sectors, cylinders;
+
+ sectors = 63L; /* Maximize sectors per track */
+ if(capacity <= BLOCK_SIZE_528M)
+ heads = 16;
+ else if(capacity <= BLOCK_SIZE_1G)
+ heads = 32;
+ else if(capacity <= BLOCK_SIZE_21G)
+ heads = 64;
+ else if(capacity <= BLOCK_SIZE_42G)
+ heads = 128;
+ else
+ heads = 255;
+
+ cylinders = capacity / (heads * sectors);
+
+ *cyls = (unsigned short) cylinders; /* Stuff return values */
+ *secs = (unsigned char) sectors;
+ *hds = (unsigned char) heads;
+}
+
+
+/*
+ * Rescan the partition tables
+ */
+
+static int do_i2ob_revalidate(kdev_t dev, int maxu)
+{
+ int minor=MINOR(dev);
+ int i;
+
+ minor&=0xF0;
+
+ i2ob_dev[minor].refcnt++;
+ if(i2ob_dev[minor].refcnt>maxu+1)
+ {
+ i2ob_dev[minor].refcnt--;
+ return -EBUSY;
+ }
+
+ for( i = 15; i>=0 ; i--)
+ {
+ int m = minor+i;
+ invalidate_device(MKDEV(MAJOR_NR, m), 1);
+ i2ob_gendisk.part[m].start_sect = 0;
+ i2ob_gendisk.part[m].nr_sects = 0;
+ }
+
+ /*
+ * Do a physical check and then reconfigure
+ */
+
+ i2ob_install_device(i2ob_dev[minor].controller, i2ob_dev[minor].i2odev,
+ minor);
+ i2ob_dev[minor].refcnt--;
+ return 0;
+}
+
+/*
+ * Issue device specific ioctl calls.
+ */
+
+static int i2ob_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct i2ob_device *dev;
+ int minor;
+
+ /* Anyone capable of this syscall can do *real bad* things */
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (!inode)
+ return -EINVAL;
+ minor = MINOR(inode->i_rdev);
+ if (minor >= (MAX_I2OB<<4))
+ return -ENODEV;
+
+ dev = &i2ob_dev[minor];
+ switch (cmd) {
+ case BLKGETSIZE:
+ return put_user(i2ob[minor].nr_sects, (long *) arg);
+ case BLKGETSIZE64:
+ return put_user((u64)i2ob[minor].nr_sects << 9, (u64 *)arg);
+
+ case HDIO_GETGEO:
+ {
+ struct hd_geometry g;
+ int u=minor&0xF0;
+ i2o_block_biosparam(i2ob_sizes[u]<<1,
+ &g.cylinders, &g.heads, &g.sectors);
+ g.start = i2ob[minor].start_sect;
+ return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
+ }
+
+ case BLKRRPART:
+ if(!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return do_i2ob_revalidate(inode->i_rdev,1);
+
+ case BLKFLSBUF:
+ case BLKROSET:
+ case BLKROGET:
+ case BLKRASET:
+ case BLKRAGET:
+ case BLKPG:
+ return blk_ioctl(inode->i_rdev, cmd, arg);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * Close the block device down
+ */
+
+static int i2ob_release(struct inode *inode, struct file *file)
+{
+ struct i2ob_device *dev;
+ int minor;
+
+ minor = MINOR(inode->i_rdev);
+ if (minor >= (MAX_I2OB<<4))
+ return -ENODEV;
+ dev = &i2ob_dev[(minor&0xF0)];
+
+ /*
+ * This is to deail with the case of an application
+ * opening a device and then the device dissapears while
+ * it's in use, and then the application tries to release
+ * it. ex: Unmounting a deleted RAID volume at reboot.
+ * If we send messages, it will just cause FAILs since
+ * the TID no longer exists.
+ */
+ if(!dev->i2odev)
+ return 0;
+
+ /* Sync the device so we don't get errors */
+ fsync_dev(inode->i_rdev);
+
+ if (dev->refcnt <= 0)
+ printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
+ dev->refcnt--;
+ if(dev->refcnt==0)
+ {
+ /*
+ * Flush the onboard cache on unmount
+ */
+ u32 msg[5];
+ int *query_done = &dev->done_flag;
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
+ msg[2] = i2ob_context|0x40000000;
+ msg[3] = (u32)query_done;
+ msg[4] = 60<<16;
+ DEBUG("Flushing...");
+ i2o_post_wait(dev->controller, msg, 20, 60);
+
+ /*
+ * Unlock the media
+ */
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
+ msg[2] = i2ob_context|0x40000000;
+ msg[3] = (u32)query_done;
+ msg[4] = -1;
+ DEBUG("Unlocking...");
+ i2o_post_wait(dev->controller, msg, 20, 2);
+ DEBUG("Unlocked.\n");
+
+ /*
+ * Now unclaim the device.
+ */
+
+ if (i2o_release_device(dev->i2odev, &i2o_block_handler))
+ printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
+
+ DEBUG("Unclaim\n");
+ }
+ MOD_DEC_USE_COUNT;
+ return 0;
+}
+
+/*
+ * Open the block device.
+ */
+
+static int i2ob_open(struct inode *inode, struct file *file)
+{
+ int minor;
+ struct i2ob_device *dev;
+
+ if (!inode)
+ return -EINVAL;
+ minor = MINOR(inode->i_rdev);
+ if (minor >= MAX_I2OB<<4)
+ return -ENODEV;
+ dev=&i2ob_dev[(minor&0xF0)];
+
+ if(!dev->i2odev)
+ return -ENODEV;
+
+ if(dev->refcnt++==0)
+ {
+ u32 msg[6];
+
+ DEBUG("Claim ");
+ if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
+ {
+ dev->refcnt--;
+ printk(KERN_INFO "I2O Block: Could not open device\n");
+ return -EBUSY;
+ }
+ DEBUG("Claimed ");
+
+ /*
+ * Mount the media if needed. Note that we don't use
+ * the lock bit. Since we have to issue a lock if it
+ * refuses a mount (quite possible) then we might as
+ * well just send two messages out.
+ */
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
+ msg[4] = -1;
+ msg[5] = 0;
+ DEBUG("Mount ");
+ i2o_post_wait(dev->controller, msg, 24, 2);
+
+ /*
+ * Lock the media
+ */
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
+ msg[4] = -1;
+ DEBUG("Lock ");
+ i2o_post_wait(dev->controller, msg, 20, 2);
+ DEBUG("Ready.\n");
+ }
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+/*
+ * Issue a device query
+ */
+
+static int i2ob_query_device(struct i2ob_device *dev, int table,
+ int field, void *buf, int buflen)
+{
+ return i2o_query_scalar(dev->controller, dev->tid,
+ table, field, buf, buflen);
+}
+
+
+/*
+ * Install the I2O block device we found.
+ */
+
+static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
+{
+ u64 size;
+ u32 blocksize;
+ u32 limit;
+ u8 type;
+ u32 flags, status;
+ struct i2ob_device *dev=&i2ob_dev[unit];
+ int i;
+
+ /*
+ * For logging purposes...
+ */
+ printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n",
+ d->lct_data.tid, unit);
+
+ /*
+ * Ask for the current media data. If that isn't supported
+ * then we ask for the device capacity data
+ */
+ if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
+ || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
+ {
+ i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
+ i2ob_query_device(dev, 0x0000, 4, &size, 8);
+ }
+
+ i2ob_query_device(dev, 0x0000, 5, &flags, 4);
+ i2ob_query_device(dev, 0x0000, 6, &status, 4);
+ i2ob_sizes[unit] = (int)(size>>10);
+ for(i=unit; i <= unit+15 ; i++)
+ i2ob_hardsizes[i] = blocksize;
+ i2ob_gendisk.part[unit].nr_sects = size>>9;
+ i2ob[unit].nr_sects = (int)(size>>9);
+
+ /* Set limit based on inbound frame size */
+ limit = (d->controller->status_block->inbound_frame_size - 8)/2;
+ limit = limit<<9;
+
+ /*
+ * Max number of Scatter-Gather Elements
+ */
+
+ for(i=unit;i<=unit+15;i++)
+ {
+ if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy)
+ {
+ i2ob_max_sectors[i] = 32;
+ i2ob_dev[i].max_segments = 8;
+ i2ob_dev[i].depth = 4;
+ }
+ else if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
+ {
+ i2ob_max_sectors[i] = 8;
+ i2ob_dev[i].max_segments = 8;
+ }
+ else
+ {
+ /* MAX_SECTORS was used but 255 is a dumb number for
+ striped RAID */
+ i2ob_max_sectors[i]=256;
+ i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2;
+ }
+ }
+
+ printk(KERN_INFO "Max segments set to %d\n",
+ i2ob_dev[unit].max_segments);
+ printk(KERN_INFO "Byte limit is %d.\n", limit);
+
+ i2ob_query_device(dev, 0x0000, 0, &type, 1);
+
+ sprintf(d->dev_name, "%s%c", i2ob_gendisk.major_name, 'a' + (unit>>4));
+
+ printk(KERN_INFO "%s: ", d->dev_name);
+ switch(type)
+ {
+ case 0: printk("Disk Storage");break;
+ case 4: printk("WORM");break;
+ case 5: printk("CD-ROM");break;
+ case 7: printk("Optical device");break;
+ default:
+ printk("Type %d", type);
+ }
+ if(status&(1<<10))
+ printk("(RAID)");
+ if(((flags & (1<<3)) && !(status & (1<<3))) ||
+ ((flags & (1<<4)) && !(status & (1<<4))))
+ {
+ printk(KERN_INFO " Not loaded.\n");
+ return 1;
+ }
+ printk("- %dMb, %d byte sectors",
+ (int)(size>>20), blocksize);
+ if(status&(1<<0))
+ {
+ u32 cachesize;
+ i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
+ cachesize>>=10;
+ if(cachesize>4095)
+ printk(", %dMb cache", cachesize>>10);
+ else
+ printk(", %dKb cache", cachesize);
+
+ }
+ printk(".\n");
+ printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n",
+ d->dev_name, i2ob_max_sectors[unit]);
+
+ /*
+ * If this is the first I2O block device found on this IOP,
+ * we need to initialize all the queue data structures
+ * before any I/O can be performed. If it fails, this
+ * device is useless.
+ */
+ if(!i2ob_queues[c->unit]) {
+ if(i2ob_init_iop(c->unit))
+ return 1;
+ }
+
+ /*
+ * This will save one level of lookup/indirection in critical
+ * code so that we can directly get the queue ptr from the
+ * device instead of having to go the IOP data structure.
+ */
+ dev->req_queue = &i2ob_queues[c->unit]->req_queue;
+
+ grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9));
+
+ /*
+ * Register for the events we're interested in and that the
+ * device actually supports.
+ */
+ i2o_event_register(c, d->lct_data.tid, i2ob_context, unit,
+ (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
+
+ return 0;
+}
+
+/*
+ * Initialize IOP specific queue structures. This is called
+ * once for each IOP that has a block device sitting behind it.
+ */
+static int i2ob_init_iop(unsigned int unit)
+{
+ int i;
+
+ i2ob_queues[unit] = (struct i2ob_iop_queue*)
+ kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
+ if(!i2ob_queues[unit])
+ {
+ printk(KERN_WARNING
+ "Could not allocate request queue for I2O block device!\n");
+ return -1;
+ }
+
+ for(i = 0; i< MAX_I2OB_DEPTH; i++)
+ {
+ i2ob_queues[unit]->request_queue[i].next =
+ &i2ob_queues[unit]->request_queue[i+1];
+ i2ob_queues[unit]->request_queue[i].num = i;
+ }
+
+ /* Queue is MAX_I2OB + 1... */
+ i2ob_queues[unit]->request_queue[i].next = NULL;
+ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
+ atomic_set(&i2ob_queues[unit]->queue_depth, 0);
+
+ blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
+ blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0);
+ i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge;
+ i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge;
+ i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests;
+ i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
+
+ return 0;
+}
+
+/*
+ * Get the request queue for the given device.
+ */
+static request_queue_t* i2ob_get_queue(kdev_t dev)
+{
+ int unit = MINOR(dev)&0xF0;
+
+ return i2ob_dev[unit].req_queue;
+}
+
+/*
+ * Probe the I2O subsytem for block class devices
+ */
+static void i2ob_scan(int bios)
+{
+ int i;
+ int warned = 0;
+
+ struct i2o_device *d, *b=NULL;
+ struct i2o_controller *c;
+ struct i2ob_device *dev;
+
+ for(i=0; i< MAX_I2O_CONTROLLERS; i++)
+ {
+ c=i2o_find_controller(i);
+
+ if(c==NULL)
+ continue;
+
+ /*
+ * The device list connected to the I2O Controller is doubly linked
+ * Here we traverse the end of the list , and start claiming devices
+ * from that end. This assures that within an I2O controller atleast
+ * the newly created volumes get claimed after the older ones, thus
+ * mapping to same major/minor (and hence device file name) after
+ * every reboot.
+ * The exception being:
+ * 1. If there was a TID reuse.
+ * 2. There was more than one I2O controller.
+ */
+
+ if(!bios)
+ {
+ for (d=c->devices;d!=NULL;d=d->next)
+ if(d->next == NULL)
+ b = d;
+ }
+ else
+ b = c->devices;
+
+ while(b != NULL)
+ {
+ d=b;
+ if(bios)
+ b = b->next;
+ else
+ b = b->prev;
+
+ if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
+ continue;
+
+ if(d->lct_data.user_tid != 0xFFF)
+ continue;
+
+ if(bios)
+ {
+ if(d->lct_data.bios_info != 0x80)
+ continue;
+ printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
+ }
+ else
+ {
+ if(d->lct_data.bios_info == 0x80)
+ continue; /*Already claimed on pass 1 */
+ }
+
+ if(i2o_claim_device(d, &i2o_block_handler))
+ {
+ printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
+ d->lct_data.tid);
+ printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
+ continue;
+ }
+
+ if(scan_unit<MAX_I2OB<<4)
+ {
+ /*
+ * Get the device and fill in the
+ * Tid and controller.
+ */
+ dev=&i2ob_dev[scan_unit];
+ dev->i2odev = d;
+ dev->controller = c;
+ dev->unit = c->unit;
+ dev->tid = d->lct_data.tid;
+
+ if(i2ob_install_device(c,d,scan_unit))
+ printk(KERN_WARNING "Could not install I2O block device\n");
+ else
+ {
+ scan_unit+=16;
+ i2ob_dev_count++;
+
+ /* We want to know when device goes away */
+ i2o_device_notify_on(d, &i2o_block_handler);
+ }
+ }
+ else
+ {
+ if(!warned++)
+ printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
+ }
+ i2o_release_device(d, &i2o_block_handler);
+ }
+ i2o_unlock_controller(c);
+ }
+}
+
+static void i2ob_probe(void)
+{
+ /*
+ * Some overhead/redundancy involved here, while trying to
+ * claim the first boot volume encountered as /dev/i2o/hda
+ * everytime. All the i2o_controllers are searched and the
+ * first i2o block device marked as bootable is claimed
+ * If an I2O block device was booted off , the bios sets
+ * its bios_info field to 0x80, this what we search for.
+ * Assuming that the bootable volume is /dev/i2o/hda
+ * everytime will prevent any kernel panic while mounting
+ * root partition
+ */
+
+ printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
+ i2ob_scan(1);
+
+ /*
+ * Now the remainder.
+ */
+ printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
+ i2ob_scan(0);
+}
+
+
+/*
+ * New device notification handler. Called whenever a new
+ * I2O block storage device is added to the system.
+ *
+ * Should we spin lock around this to keep multiple devs from
+ * getting updated at the same time?
+ *
+ */
+void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
+{
+ struct i2ob_device *dev;
+ int unit = 0;
+
+ printk(KERN_INFO "i2o_block: New device detected\n");
+ printk(KERN_INFO " Controller %d Tid %d\n",c->unit, d->lct_data.tid);
+
+ /* Check for available space */
+ if(i2ob_dev_count>=MAX_I2OB<<4)
+ {
+ printk(KERN_ERR "i2o_block: No more devices allowed!\n");
+ return;
+ }
+ for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
+ {
+ if(!i2ob_dev[unit].i2odev)
+ break;
+ }
+
+ if(i2o_claim_device(d, &i2o_block_handler))
+ {
+ printk(KERN_INFO
+ "i2o_block: Unable to claim device. Installation aborted\n");
+ return;
+ }
+
+ dev = &i2ob_dev[unit];
+ dev->i2odev = d;
+ dev->controller = c;
+ dev->tid = d->lct_data.tid;
+
+ if(i2ob_install_device(c,d,unit))
+ printk(KERN_ERR "i2o_block: Could not install new device\n");
+ else
+ {
+ i2ob_dev_count++;
+ i2o_device_notify_on(d, &i2o_block_handler);
+ }
+
+ i2o_release_device(d, &i2o_block_handler);
+
+ return;
+}
+
+/*
+ * Deleted device notification handler. Called when a device we
+ * are talking to has been deleted by the user or some other
+ * mysterious fource outside the kernel.
+ */
+void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
+{
+ int unit = 0;
+ int i = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ /*
+ * Need to do this...we somtimes get two events from the IRTOS
+ * in a row and that causes lots of problems.
+ */
+ i2o_device_notify_off(d, &i2o_block_handler);
+
+ printk(KERN_INFO "I2O Block Device Deleted\n");
+
+ for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
+ {
+ if(i2ob_dev[unit].i2odev == d)
+ {
+ printk(KERN_INFO " /dev/%s: Controller %d Tid %d\n",
+ d->dev_name, c->unit, d->lct_data.tid);
+ break;
+ }
+ }
+ if(unit >= MAX_I2OB<<4)
+ {
+ printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+
+ /*
+ * This will force errors when i2ob_get_queue() is called
+ * by the kenrel.
+ */
+ i2ob_dev[unit].req_queue = NULL;
+ for(i = unit; i <= unit+15; i++)
+ {
+ i2ob_dev[i].i2odev = NULL;
+ i2ob_sizes[i] = 0;
+ i2ob_hardsizes[i] = 0;
+ i2ob_max_sectors[i] = 0;
+ i2ob[i].nr_sects = 0;
+ i2ob_gendisk.part[i].nr_sects = 0;
+ }
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+ /*
+ * Decrease usage count for module
+ */
+
+ while(i2ob_dev[unit].refcnt--)
+ MOD_DEC_USE_COUNT;
+
+ i2ob_dev[unit].refcnt = 0;
+
+ i2ob_dev[i].tid = 0;
+
+ /*
+ * Do we need this?
+ * The media didn't really change...the device is just gone
+ */
+ i2ob_media_change_flag[unit] = 1;
+
+ i2ob_dev_count--;
+}
+
+/*
+ * Have we seen a media change ?
+ */
+static int i2ob_media_change(kdev_t dev)
+{
+ int i=MINOR(dev);
+ i>>=4;
+ if(i2ob_media_change_flag[i])
+ {
+ i2ob_media_change_flag[i]=0;
+ return 1;
+ }
+ return 0;
+}
+
+static int i2ob_revalidate(kdev_t dev)
+{
+ return do_i2ob_revalidate(dev, 0);
+}
+
+/*
+ * Reboot notifier. This is called by i2o_core when the system
+ * shuts down.
+ */
+static void i2ob_reboot_event(void)
+{
+ int i;
+
+ for(i=0;i<MAX_I2OB;i++)
+ {
+ struct i2ob_device *dev=&i2ob_dev[(i<<4)];
+
+ if(dev->refcnt!=0)
+ {
+ /*
+ * Flush the onboard cache
+ */
+ u32 msg[5];
+ int *query_done = &dev->done_flag;
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
+ msg[2] = i2ob_context|0x40000000;
+ msg[3] = (u32)query_done;
+ msg[4] = 60<<16;
+
+ DEBUG("Flushing...");
+ i2o_post_wait(dev->controller, msg, 20, 60);
+
+ DEBUG("Unlocking...");
+ /*
+ * Unlock the media
+ */
+ msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
+ msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
+ msg[2] = i2ob_context|0x40000000;
+ msg[3] = (u32)query_done;
+ msg[4] = -1;
+ i2o_post_wait(dev->controller, msg, 20, 2);
+
+ DEBUG("Unlocked.\n");
+ }
+ }
+}
+
+static struct block_device_operations i2ob_fops =
+{
+ open: i2ob_open,
+ release: i2ob_release,
+ ioctl: i2ob_ioctl,
+ check_media_change: i2ob_media_change,
+ revalidate: i2ob_revalidate,
+};
+
+static struct gendisk i2ob_gendisk =
+{
+ major: MAJOR_NR,
+ major_name: "i2o/hd",
+ minor_shift: 4,
+ max_p: 1<<4,
+ part: i2ob,
+ sizes: i2ob_sizes,
+ nr_real: MAX_I2OB,
+ fops: &i2ob_fops,
+};
+
+
+/*
+ * And here should be modules and kernel interface
+ * (Just smiley confuses emacs :-)
+ */
+
+#ifdef MODULE
+#define i2o_block_init init_module
+#endif
+
+int i2o_block_init(void)
+{
+ int i;
+
+ printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
+ printk(KERN_INFO " (c) Copyright 1999-2001 Red Hat Software.\n");
+
+ /*
+ * Register the block device interfaces
+ */
+
+ if (register_blkdev(MAJOR_NR, "i2o_block", &i2ob_fops)) {
+ printk(KERN_ERR "Unable to get major number %d for i2o_block\n",
+ MAJOR_NR);
+ return -EIO;
+ }
+#ifdef MODULE
+ printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
+#endif
+
+ /*
+ * Now fill in the boiler plate
+ */
+
+ blksize_size[MAJOR_NR] = i2ob_blksizes;
+ hardsect_size[MAJOR_NR] = i2ob_hardsizes;
+ blk_size[MAJOR_NR] = i2ob_sizes;
+ max_sectors[MAJOR_NR] = i2ob_max_sectors;
+ blk_dev[MAJOR_NR].queue = i2ob_get_queue;
+
+ blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
+ blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
+
+ for (i = 0; i < MAX_I2OB << 4; i++) {
+ i2ob_dev[i].refcnt = 0;
+ i2ob_dev[i].flags = 0;
+ i2ob_dev[i].controller = NULL;
+ i2ob_dev[i].i2odev = NULL;
+ i2ob_dev[i].tid = 0;
+ i2ob_dev[i].head = NULL;
+ i2ob_dev[i].tail = NULL;
+ i2ob_dev[i].depth = MAX_I2OB_DEPTH;
+ i2ob_blksizes[i] = 1024;
+ i2ob_max_sectors[i] = 2;
+ }
+
+ /*
+ * Set up the queue
+ */
+ for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
+ {
+ i2ob_queues[i] = NULL;
+ }
+
+ /*
+ * Timers
+ */
+
+ init_timer(&i2ob_timer);
+ i2ob_timer.function = i2ob_timer_handler;
+ i2ob_timer.data = 0;
+
+ /*
+ * Register the OSM handler as we will need this to probe for
+ * drives, geometry and other goodies.
+ */
+
+ if(i2o_install_handler(&i2o_block_handler)<0)
+ {
+ unregister_blkdev(MAJOR_NR, "i2o_block");
+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+ printk(KERN_ERR "i2o_block: unable to register OSM.\n");
+ return -EINVAL;
+ }
+ i2ob_context = i2o_block_handler.context;
+
+ /*
+ * Initialize event handling thread
+ */
+ init_MUTEX_LOCKED(&i2ob_evt_sem);
+ evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
+ if(evt_pid < 0)
+ {
+ printk(KERN_ERR
+ "i2o_block: Could not initialize event thread. Aborting\n");
+ i2o_remove_handler(&i2o_block_handler);
+ return 0;
+ }
+
+ /*
+ * Finally see what is actually plugged in to our controllers
+ */
+ for (i = 0; i < MAX_I2OB; i++)
+ register_disk(&i2ob_gendisk, MKDEV(MAJOR_NR,i<<4), 1<<4,
+ &i2ob_fops, 0);
+ i2ob_probe();
+
+ /*
+ * Adding i2ob_gendisk into the gendisk list.
+ */
+ add_gendisk(&i2ob_gendisk);
+
+ return 0;
+}
+
+#ifdef MODULE
+
+EXPORT_NO_SYMBOLS;
+MODULE_AUTHOR("Red Hat Software");
+MODULE_DESCRIPTION("I2O Block Device OSM");
+MODULE_LICENSE("GPL");
+
+
+void cleanup_module(void)
+{
+ int i;
+
+ if(evt_running) {
+ printk(KERN_INFO "Killing I2O block threads...");
+ i = kill_proc(evt_pid, SIGTERM, 1);
+ if(!i) {
+ printk("waiting...");
+ }
+ /* Be sure it died */
+ wait_for_completion(&i2ob_thread_dead);
+ printk("done.\n");
+ }
+
+ /*
+ * Unregister for updates from any devices..otherwise we still
+ * get them and the core jumps to random memory :O
+ */
+ if(i2ob_dev_count) {
+ struct i2o_device *d;
+ for(i = 0; i < MAX_I2OB; i++)
+ if((d=i2ob_dev[i<<4].i2odev)) {
+ i2o_device_notify_off(d, &i2o_block_handler);
+ i2o_event_register(d->controller, d->lct_data.tid,
+ i2ob_context, i<<4, 0);
+ }
+ }
+
+ /*
+ * We may get further callbacks for ourself. The i2o_core
+ * code handles this case reasonably sanely. The problem here
+ * is we shouldn't get them .. but a couple of cards feel
+ * obliged to tell us stuff we dont care about.
+ *
+ * This isnt ideal at all but will do for now.
+ */
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+
+ /*
+ * Flush the OSM
+ */
+
+ i2o_remove_handler(&i2o_block_handler);
+
+ /*
+ * Return the block device
+ */
+ if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
+ printk("i2o_block: cleanup_module failed\n");
+
+ /*
+ * free request queue
+ */
+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+
+ del_gendisk(&i2ob_gendisk);
+}
+#endif
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)