patch-2.4.25 linux-2.4.25/arch/ia64/kernel/salinfo.c
Next file: linux-2.4.25/arch/ia64/kernel/setup.c
Previous file: linux-2.4.25/arch/ia64/kernel/perfmon.c
Back to the patch index
Back to the overall index
- Lines: 647
- Date:
2004-02-18 05:36:30.000000000 -0800
- Orig file:
linux-2.4.24/arch/ia64/kernel/salinfo.c
- Orig date:
2003-11-28 10:26:19.000000000 -0800
diff -urN linux-2.4.24/arch/ia64/kernel/salinfo.c linux-2.4.25/arch/ia64/kernel/salinfo.c
@@ -3,19 +3,29 @@
*
* Creates entries in /proc/sal for various system features.
*
- * Copyright (c) 2001 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2003 Hewlett-Packard Co
* Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo
* code to create this file
+ * Oct 23 2003 kaos@sgi.com
+ * Replace IPI with set_cpus_allowed() to read a record from the required cpu.
+ * Redesign salinfo log processing to separate interrupt and user space
+ * contexts.
+ * Cache the record across multi-block reads from user space.
+ * Support > 64 cpus.
+ * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
*/
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <asm/semaphore.h>
#include <asm/sal.h>
#include <asm/uaccess.h>
@@ -57,48 +67,175 @@
(2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */
1]; /* /proc/sal */
-struct salinfo_log_data {
- int type;
- u8 *log_buffer;
- u64 log_size;
-};
+/* Allow build with or without large SSI support */
+#ifdef CPU_MASK_NONE
+#define SCA(x, y) set_cpus_allowed((x), &(y))
+#else
+#define cpumask_t unsigned long
+#define SCA(x, y) set_cpus_allowed((x), (y))
+#endif
-struct salinfo_event {
- int type;
- int cpu; /* next CPU to check */
- volatile unsigned long cpu_mask;
- wait_queue_head_t queue;
+/* Some records we get ourselves, some are accessed as saved data in buffers
+ * that are owned by mca.c.
+ */
+struct salinfo_data_saved {
+ u8* buffer;
+ u64 size;
+ u64 id;
+ int cpu;
};
-static struct salinfo_event *salinfo_event[ARRAY_SIZE(salinfo_log_name)];
+/* State transitions. Actions are :-
+ * Write "read <cpunum>" to the data file.
+ * Write "clear <cpunum>" to the data file.
+ * Write "oemdata <cpunum> <offset> to the data file.
+ * Read from the data file.
+ * Close the data file.
+ *
+ * Start state is NO_DATA.
+ *
+ * NO_DATA
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> return -EINVAL.
+ * read data -> return EOF.
+ * close -> unchanged. Free record areas.
+ *
+ * LOG_RECORD
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ * read data -> return the INIT/MCA/CMC/CPE record.
+ * close -> unchanged. Keep record areas.
+ *
+ * OEMDATA
+ * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
+ * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
+ * read data -> return the formatted oemdata.
+ * close -> unchanged. Keep record areas.
+ *
+ * Closing the data file does not change the state. This allows shell scripts
+ * to manipulate salinfo data, each shell redirection opens the file, does one
+ * action then closes it again. The record areas are only freed at close when
+ * the state is NO_DATA.
+ */
+enum salinfo_state {
+ STATE_NO_DATA,
+ STATE_LOG_RECORD,
+ STATE_OEMDATA,
+};
struct salinfo_data {
- int open; /* single-open to prevent races */
- int type;
- int cpu; /* "current" cpu for reads */
+ volatile cpumask_t cpu_event; /* which cpus have outstanding events */
+ struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */
+ u8 *log_buffer;
+ u64 log_size;
+ u8 *oemdata; /* decoded oem data */
+ u64 oemdata_size;
+ int open; /* single-open to prevent races */
+ u8 type;
+ u8 saved_num; /* using a saved record? */
+ enum salinfo_state state :8; /* processing state */
+ u8 padding;
+ int cpu_check; /* next CPU to check */
+ struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
};
static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
-static spinlock_t data_lock;
+static spinlock_t data_lock, data_saved_lock;
+/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
+ * record.
+ * @sect_header: pointer to the start of the section to decode.
+ * @oemdata: returns vmalloc area containing the decded output.
+ * @oemdata_size: returns length of decoded output (strlen).
+ *
+ * Description: If user space asks for oem data to be decoded by the kernel
+ * and/or prom and the platform has set salinfo_platform_oemdata to the address
+ * of a platform specific routine then call that routine. salinfo_platform_oemdata
+ * vmalloc's and formats its output area, returning the address of the text
+ * and its strlen. Returns 0 for success, -ve for error. The callback is
+ * invoked on the cpu that generated the error record.
+ */
+int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
+
+struct salinfo_platform_oemdata_parms {
+ const u8 *efi_guid;
+ u8 **oemdata;
+ u64 *oemdata_size;
+ int ret;
+};
+
+static void
+salinfo_platform_oemdata_cpu(void *context)
+{
+ struct salinfo_platform_oemdata_parms *parms = context;
+ parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
+}
+
+static void
+shift1_data_saved (struct salinfo_data *data, int shift)
+{
+ memcpy(data->data_saved+shift, data->data_saved+shift+1,
+ (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
+ memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
+ sizeof(data->data_saved[0]));
+}
+
+/* This routine is invoked in interrupt context. Note: mca.c enables
+ * interrupts before calling this code for CMC/CPE. MCA and INIT events are
+ * not irq safe, do not call any routines that use spinlocks, they may deadlock.
+ *
+ * The buffer passed from mca.c points to the output from ia64_log_get. This is
+ * a persistent buffer but its contents can change between the interrupt and
+ * when user space processes the record. Save the record id to identify
+ * changes.
+ */
void
-salinfo_log_wakeup(int type)
+salinfo_log_wakeup(int type, u8 *buffer, u64 size)
{
- if (type < ARRAY_SIZE(salinfo_log_name)) {
- struct salinfo_event *event = salinfo_event[type];
+ struct salinfo_data *data = salinfo_data + type;
+ struct salinfo_data_saved *data_saved;
+ unsigned long flags = 0;
+ int i, irqsafe = type != SAL_INFO_TYPE_MCA && type != SAL_INFO_TYPE_INIT;
+ int saved_size = ARRAY_SIZE(data->data_saved);
+
+ BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
+
+ if (irqsafe)
+ spin_lock_irqsave(&data_saved_lock, flags);
+ for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+ if (!data_saved->buffer)
+ break;
+ }
+ if (i == saved_size) {
+ if (!data->saved_num) {
+ shift1_data_saved(data, 0);
+ data_saved = data->data_saved + saved_size - 1;
+ } else
+ data_saved = NULL;
+ }
+ if (data_saved) {
+ data_saved->cpu = smp_processor_id();
+ data_saved->id = ((sal_log_record_header_t *)buffer)->id;
+ data_saved->size = size;
+ data_saved->buffer = buffer;
+ }
+ if (irqsafe)
+ spin_unlock_irqrestore(&data_saved_lock, flags);
- if (event) {
- set_bit(smp_processor_id(), &event->cpu_mask);
- wake_up_interruptible(&event->queue);
- }
+ if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
+ if (irqsafe)
+ up(&data->sem);
}
}
static int
salinfo_event_open(struct inode *inode, struct file *file)
{
- if (!suser())
+ if (!capable(CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
@@ -107,24 +244,23 @@
salinfo_event_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
{
struct inode *inode = file->f_dentry->d_inode;
- struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
- struct salinfo_event *event = entry->data;
+ struct proc_dir_entry *entry = PDE(inode);
+ struct salinfo_data *data = entry->data;
char cmd[32];
size_t size;
int i, n, cpu = -1;
retry:
- if (!event->cpu_mask) {
+ if (down_trylock(&data->sem)) {
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
- interruptible_sleep_on(&event->queue);
- if (signal_pending(current))
- return -EINTR;
+ if (down_interruptible(&data->sem))
+ return -ERESTARTSYS;
}
- n = event->cpu;
+ n = data->cpu_check;
for (i = 0; i < NR_CPUS; i++) {
- if (event->cpu_mask & 1UL << n) {
+ if (test_bit(n, &data->cpu_event)) {
cpu = n;
break;
}
@@ -135,10 +271,13 @@
if (cpu == -1)
goto retry;
+ /* events are sticky until the user says "clear" */
+ up(&data->sem);
+
/* for next read, start checking at next CPU */
- event->cpu = cpu;
- if (++event->cpu == NR_CPUS)
- event->cpu = 0;
+ data->cpu_check = cpu;
+ if (++data->cpu_check == NR_CPUS)
+ data->cpu_check = 0;
snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
@@ -159,10 +298,10 @@
static int
salinfo_log_open(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ struct proc_dir_entry *entry = PDE(inode);
struct salinfo_data *data = entry->data;
- if (!suser())
+ if (!capable(CAP_SYS_ADMIN))
return -EPERM;
spin_lock(&data_lock);
@@ -173,15 +312,27 @@
data->open = 1;
spin_unlock(&data_lock);
+ if (data->state == STATE_NO_DATA &&
+ !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
+ data->open = 0;
+ return -ENOMEM;
+ }
+
return 0;
}
static int
salinfo_log_release(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ struct proc_dir_entry *entry = PDE(inode);
struct salinfo_data *data = entry->data;
+ if (data->state == STATE_NO_DATA) {
+ vfree(data->log_buffer);
+ vfree(data->oemdata);
+ data->log_buffer = NULL;
+ data->oemdata = NULL;
+ }
spin_lock(&data_lock);
data->open = 0;
spin_unlock(&data_lock);
@@ -191,95 +342,136 @@
static void
call_on_cpu(int cpu, void (*fn)(void *), void *arg)
{
- if (cpu == smp_processor_id())
- (*fn)(arg);
-#ifdef CONFIG_SMP
- else if (cpu_online(cpu)) /* cpu may not have been validated */
- smp_call_function_single(cpu, fn, arg, 0, 1);
-#endif
+ cpumask_t save_cpus_allowed, new_cpus_allowed;
+ memcpy(&save_cpus_allowed, ¤t->cpus_allowed, sizeof(save_cpus_allowed));
+ memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
+ set_bit(cpu, &new_cpus_allowed);
+ SCA(current, new_cpus_allowed);
+ (*fn)(arg);
+ SCA(current, save_cpus_allowed);
}
static void
salinfo_log_read_cpu(void *context)
{
- struct salinfo_log_data *info = context;
- struct salinfo_event *event = salinfo_event[info->type];
- u64 size;
-
- size = ia64_sal_get_state_info_size(info->type);
- info->log_buffer = kmalloc(size, GFP_ATOMIC);
- if (!info->log_buffer)
- return;
-
- clear_bit(smp_processor_id(), &event->cpu_mask);
- info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
- if (info->log_size)
- salinfo_log_wakeup(info->type);
+ struct salinfo_data *data = context;
+ data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
+ if (data->type == SAL_INFO_TYPE_CPE || data->type == SAL_INFO_TYPE_CMC)
+ ia64_sal_clear_state_info(data->type);
+}
+
+static void
+salinfo_log_new_read(int cpu, struct salinfo_data *data)
+{
+ struct salinfo_data_saved *data_saved;
+ unsigned long flags;
+ int i;
+ int saved_size = ARRAY_SIZE(data->data_saved);
+
+ data->saved_num = 0;
+ spin_lock_irqsave(&data_saved_lock, flags);
+retry:
+ for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
+ if (data_saved->buffer && data_saved->cpu == cpu) {
+ sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
+ data->log_size = data_saved->size;
+ memcpy(data->log_buffer, rh, data->log_size);
+ barrier(); /* id check must not be moved */
+ if (rh->id == data_saved->id) {
+ data->saved_num = i+1;
+ break;
+ }
+ /* saved record changed by mca.c since interrupt, discard it */
+ shift1_data_saved(data, i);
+ goto retry;
+ }
+ }
+ spin_unlock_irqrestore(&data_saved_lock, flags);
+
+ if (!data->saved_num)
+ call_on_cpu(cpu, salinfo_log_read_cpu, data);
+ data->state = data->log_size ? STATE_LOG_RECORD : STATE_NO_DATA;
}
static ssize_t
salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
{
struct inode *inode = file->f_dentry->d_inode;
- struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ struct proc_dir_entry *entry = PDE(inode);
struct salinfo_data *data = entry->data;
- struct salinfo_log_data info;
- int ret;
void *saldata;
size_t size;
+ u8 *buf;
+ u64 bufsize;
- info.type = data->type;
- info.log_buffer = 0;
- call_on_cpu(data->cpu, salinfo_log_read_cpu, &info);
- if (!info.log_buffer || *ppos >= info.log_size) {
- ret = 0;
- goto out;
+ if (data->state == STATE_LOG_RECORD) {
+ buf = data->log_buffer;
+ bufsize = data->log_size;
+ } else if (data->state == STATE_OEMDATA) {
+ buf = data->oemdata;
+ bufsize = data->oemdata_size;
+ } else {
+ buf = NULL;
+ bufsize = 0;
}
+ if (*ppos >= bufsize)
+ return 0;
- saldata = info.log_buffer + file->f_pos;
- size = info.log_size - file->f_pos;
+ saldata = buf + file->f_pos;
+ size = bufsize - file->f_pos;
if (size > count)
size = count;
- if (copy_to_user(buffer, saldata, size)) {
- ret = -EFAULT;
- goto out;
- }
+ if (copy_to_user(buffer, saldata, size))
+ return -EFAULT;
*ppos += size;
- ret = size;
-
-out:
- kfree(info.log_buffer);
- return ret;
+ return size;
}
static void
salinfo_log_clear_cpu(void *context)
{
struct salinfo_data *data = context;
- struct salinfo_event *event = salinfo_event[data->type];
- struct salinfo_log_data info;
-
- clear_bit(smp_processor_id(), &event->cpu_mask);
ia64_sal_clear_state_info(data->type);
+}
- /* clearing one record may make another visible */
- info.type = data->type;
- salinfo_log_read_cpu(&info);
- if (info.log_buffer && info.log_size)
- salinfo_log_wakeup(data->type);
-
- kfree(info.log_buffer);
+static int
+salinfo_log_clear(struct salinfo_data *data, int cpu)
+{
+ data->state = STATE_NO_DATA;
+ if (!test_bit(cpu, &data->cpu_event))
+ return 0;
+ down(&data->sem);
+ clear_bit(cpu, &data->cpu_event);
+ if (data->saved_num) {
+ unsigned long flags;
+ spin_lock_irqsave(&data_saved_lock, flags);
+ shift1_data_saved(data, data->saved_num - 1 );
+ data->saved_num = 0;
+ spin_unlock_irqrestore(&data_saved_lock, flags);
+ }
+ /* ia64_mca_log_sal_error_record or salinfo_log_read_cpu already cleared
+ * CPE and CMC errors
+ */
+ if (data->type != SAL_INFO_TYPE_CPE && data->type != SAL_INFO_TYPE_CMC)
+ call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+ /* clearing a record may make a new record visible */
+ salinfo_log_new_read(cpu, data);
+ if (data->state == STATE_LOG_RECORD &&
+ !test_and_set_bit(cpu, &data->cpu_event))
+ up(&data->sem);
+ return 0;
}
static ssize_t
salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
{
struct inode *inode = file->f_dentry->d_inode;
- struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ struct proc_dir_entry *entry = PDE(inode);
struct salinfo_data *data = entry->data;
char cmd[32];
size_t size;
+ u32 offset;
int cpu;
size = sizeof(cmd);
@@ -288,10 +480,31 @@
if (copy_from_user(cmd, buffer, size))
return -EFAULT;
- if (sscanf(cmd, "read %d", &cpu) == 1)
- data->cpu = cpu;
- else if (sscanf(cmd, "clear %d", &cpu) == 1)
- call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+ if (sscanf(cmd, "read %d", &cpu) == 1) {
+ salinfo_log_new_read(cpu, data);
+ } else if (sscanf(cmd, "clear %d", &cpu) == 1) {
+ int ret;
+ if ((ret = salinfo_log_clear(data, cpu)))
+ count = ret;
+ } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
+ if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
+ return -EINVAL;
+ if (offset > data->log_size - sizeof(efi_guid_t))
+ return -EINVAL;
+ data->state = STATE_OEMDATA;
+ if (salinfo_platform_oemdata) {
+ struct salinfo_platform_oemdata_parms parms = {
+ .efi_guid = data->log_buffer + offset,
+ .oemdata = &data->oemdata,
+ .oemdata_size = &data->oemdata_size
+ };
+ call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
+ if (parms.ret)
+ count = parms.ret;
+ } else
+ data->oemdata_size = 0;
+ } else
+ return -EINVAL;
return count;
}
@@ -309,9 +522,8 @@
struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
struct proc_dir_entry *dir, *entry;
- struct salinfo_event *event;
struct salinfo_data *data;
- int i, j;
+ int i, j, online;
salinfo_dir = proc_mkdir("sal", NULL);
if (!salinfo_dir)
@@ -324,6 +536,9 @@
}
for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+ data = salinfo_data + i;
+ data->type = i;
+ sema_init(&data->sem, 0);
dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
if (!dir)
continue;
@@ -331,32 +546,26 @@
entry = create_proc_entry("event", S_IRUSR, dir);
if (!entry)
continue;
-
- event = kmalloc(sizeof(*event), GFP_KERNEL);
- if (!event)
- continue;
- memset(event, 0, sizeof(*event));
- event->type = i;
- init_waitqueue_head(&event->queue);
- salinfo_event[i] = event;
- /* we missed any events before now */
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- set_bit(j, &event->cpu_mask);
- entry->data = event;
+ entry->data = data;
entry->proc_fops = &salinfo_event_fops;
*sdir++ = entry;
entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
if (!entry)
continue;
-
- data = &salinfo_data[i];
- data->type = i;
entry->data = data;
entry->proc_fops = &salinfo_data_fops;
*sdir++ = entry;
+ /* we missed any events before now */
+ online = 0;
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j)) {
+ set_bit(j, &data->cpu_event);
+ ++online;
+ }
+ sema_init(&data->sem, online);
+
*sdir++ = dir;
}
@@ -365,17 +574,6 @@
return 0;
}
-static void __exit
-salinfo_exit(void)
-{
- int i = 0;
-
- for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
- if (salinfo_proc_entries[i])
- remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
- }
-}
-
/*
* 'data' contains an integer that corresponds to the feature we're
* testing
@@ -385,8 +583,6 @@
{
int len = 0;
- MOD_INC_USE_COUNT;
-
len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
if (len <= off+count) *eof = 1;
@@ -397,10 +593,7 @@
if (len>count) len = count;
if (len<0) len = 0;
- MOD_DEC_USE_COUNT;
-
return len;
}
module_init(salinfo_init);
-module_exit(salinfo_exit);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)