/*
 *  vtune.c
 *
 *  Copyright (C) 2003 Intel Corporation
 *  Author/Maintainer - George W Artz <george.w.artz@intel.com>
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
/*
 *  Author(s): George Artz <george.w.artz@intel.com>
 *             Andi Kleen <ak@suse.de>, 08/25/2002, corrected return values
 *             Juan Villacis <juan.villacis@intel.com>, 09/01/2002, added
 *                           conditional compile for enforcing root access
 *                           (based on Andi's suggested capabilities fixes)
 *                           02/21/2003, added scan support for sys_call_table
 *
 *  File: vtune.c
 *
 *  System: VTune Linux Sampling Driver
 *
 * ============================================================================
 */
#include <linux/vmalloc.h>
#include <linux/slab.h>     /* malloc */
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <asm/uaccess.h>
#ifdef ENFORCE_ROOT_ONLY_ACCESS
#include <linux/capability.h>
#endif
#ifdef linux32
#include <asm/unistd.h>
#endif

#include "vtdef.h"
#include "include/vtuneshared.h"
#include "vtoshooks.h"
#include "vtproto.h"
#include "vtglobal.h"
#include "emwmt.h"

long enum_user_mode_modules(void);

#ifdef linux64
void set_PP_SW(void);
#endif

// The following is added by Fleming for the algorithm of eliminating dup
// entries for a single process when calling do_fork and do_execve in
// consecutive order.

typedef struct tag_MR_pointer {
    struct tag_MR_pointer *m_p_prev;
    struct tag_MR_pointer *m_p_next;
    module_record *m_p_mrp;
} MRPOINTER, *PMRPOINTER;
// g_MR_pointer_head used to be "last", g_MR_pointer_tail used to be
// "gMRpointer"
PMRPOINTER g_MR_pointer_tail = NULL, g_MR_pointer_head = NULL;

typedef struct tag_pid_record_pointer {
    struct tag_pid_record_pointer *m_p_next;
    pid_record *m_p_pid_record;
} pid_record_pointer, *P_pid_record_pointer;
P_pid_record_pointer g_pid_record_pointer_tail = NULL, g_pid_record_pointer_head = NULL;

ssize_t samp_write_pid_record_file(struct file *fp, char *ubuf, size_t cnt,
                   loff_t * pp);
void add_PID_create_record_to_list(pid_t pid);
void free_PID_create_record_list(void);

// For eliminating the wake_up_interruptible( ) in the ebs isr
// Added by Fleming
__u32 g_b_wake_up_interruptible = 0;
__u32 b_stop_sampling = FALSE;
__u32 restart_EBS = FALSE;
struct irqaction *pebs_irqaction = NULL;

spinlock_t vtune_modlist_lock = SPIN_LOCK_UNLOCKED;
spinlock_t pidlist_lock = SPIN_LOCK_UNLOCKED;
spinlock_t mgid_lock = SPIN_LOCK_UNLOCKED;

__u32 total_loads;      // debug code 
__u32 total_loads_init;     // debug code 
__u32 total_pid_updates;    // debug code

// module_record buffer
#define MR_BUFFER_DATA_SIZE (32*1024)
typedef struct _MR_BUF {    // 
    LIST_ENTRY link;    //
    __u32 buf_size;     // byte length of buffer
    __u32 bytes_in_buf; // count of bytes in buffer
    char buf[MR_BUFFER_DATA_SIZE];  
} MR_BUF, *PMR_BUF;

PMR_BUF p_current_mr_buf;       // current module record buffer    
LIST_ENTRY mr_buf_free_list_head;   // head of free mr  bufs           
LIST_ENTRY mr_buf_out_list_head;    // head of mr  bufs to be output   

PMR_BUF p_mr_buf_out;       // pointer to module record buffer that has
                // been popped off
                // ..output list and contains modules record
                // ready to be written to .tb5 file 
__u32 mr_buf_out_offset;    // offset to next module record in the output
                // buffer  

// Process create Record buffer        
#define PID_BUFFER_DATA_SIZE (16*1024)
typedef struct _PID_BUF {   // 
    LIST_ENTRY link;    //
    __u32 buf_size;     // byte length of buffer
    __u32 bytes_in_buf; // count of bytes in buffer
    char buf[PID_BUFFER_DATA_SIZE]; 
} PID_BUF, *PPID_BUF;

PPID_BUF p_current_pid_buf;     // current pid record buffer
LIST_ENTRY pid_buf_free_list_head;  // head of free pid  bufs
LIST_ENTRY pid_buf_out_list_head;   // head of pr  bufs to be output

PPID_BUF p_pid_buf_out;         // pointer to process create record
                    // buffer that has been popped off
// ..output list and contains pid
// record ready to be written to .tb5 file 
__u32 pid_buf_out_offset;       // offset to next pid record in the
                    // output buffer  

__u32 track_module_loads = FALSE;   // track module loads
__u32 track_process_creates = FALSE;    // track process creates
static atomic_t hook_in_progress = ATOMIC_INIT(0);
static atomic_t samp_isr_active = ATOMIC_INIT(0);

__u32 mgid;             // module group ID

void install_OS_hooks(void);
void un_install_OS_hooks(void);
int vdrv_resume_EBS(void);

#ifdef linux32
__u32 validate_EBS_regs(void);
void SAMP_Set_Apic_Virtual_Wire_Mode(void);
#endif

void add_to_mr_buf_out_offset(__u32 rec_length);

module_record *get_next_mr_rec_from_out_list(void);

PMR_BUF pop_mr_buf_from_head_of_free_list(void);

PMR_BUF pop_mr_buf_from_head_of_out_list(void);

PMR_BUF alloc_new_mr_buf(void);

void add_mr_buf_to_free_list(PMR_BUF pmr_buf);

void free_mr_bufs(void);

void move_current_mr_buf_to_out_list(void);

void add_to_pid_buf_out_offset(__u32 rec_length);

pid_record *get_next_pid_rec_from_out_list(void);

PPID_BUF alloc_new_pid_buf(void);

void add_pid_buf_to_free_list(PPID_BUF p_pid_buf);

void free_pid_bufs(void);

void move_current_pid_buf_to_out_list(void);

#ifdef linux32
void
get_cpu_info_for_current_cpu(void *p)
{
    __u32 i, j, eax, ebx, ecx, edx;
    unsigned long ul_logical_processors_per_package, ul_logical_processors_shift;
    cpu_information *p_cpu_infor0;
    cpuid_output *pout;
    cpu_map *pmap;

    p_cpu_infor0 = (cpu_information *) p;

    //
    // Skip cpu if it's out of requested range 
    //
    if (smp_processor_id() >= p_cpu_infor0->ul_num_cpus_available) {
        return;
    }

    pmap =
        (cpu_map *) ((__u32) p_cpu_infor0 + p_cpu_infor0->ul_offset_to_cpu_map_array +
            (smp_processor_id() * sizeof (cpu_map)));
    pout =
        (cpuid_output *) ((__u32) p_cpu_infor0 +
                 p_cpu_infor0->ul_offset_to_cpuid_output_array +
                 (smp_processor_id() *
                  (sizeof (cpuid_output) *
                   (p_cpu_infor0->ul_num_EAX_inputs +
                p_cpu_infor0->ul_num_EAX_extended_inputs))));
    samp_dbg_print(KERN_INFO
             "VTUNE: get_cpu_infoForCurrentCpu() p_cpu_infor0 0x%x  pmap 0x%x pout 0x%x \n",
             p_cpu_infor0, pmap, pout);

    //
    // Fill in cpu_map for current cpu
    //
    cpuid(1, &eax, &ebx, &ecx, &edx);
    pmap->ul_cpu_num = smp_processor_id();
    if (edx & CPUID_HT_MASK) {
        ul_logical_processors_per_package = (ebx & 0xff0000) >> 16;
        if (ul_logical_processors_per_package == 0) {
            ul_logical_processors_per_package = 1;
        }
        ul_logical_processors_shift = 0;
        ul_logical_processors_per_package--;
        while (1) {
            ul_logical_processors_shift++;
            ul_logical_processors_per_package >>= 1;
            if (!ul_logical_processors_per_package) {
                break;
            }
        }
        pmap->ul_hardware_thread_num =
            (ebx >> 24) & ul_logical_processors_shift;
        pmap->ul_package_num = ((ebx >> 24) >> ul_logical_processors_shift);
        samp_dbg_print(KERN_INFO
                 "VTUNE: cpu %d HT enabled HW threads per package %d shift %d \n",
                 smp_processor_id(), (ebx & 0xff0000) >> 16,
                 ul_logical_processors_shift);
    } else {
        //
        // If one HW thread per core (HT disabled), then
        // set package# = OS cpu number, and HT thread =0.
        pmap->ul_hardware_thread_num = 0;
        pmap->ul_package_num = smp_processor_id();
        samp_dbg_print(KERN_INFO "VTUNE: cpu %d HT disabled \n",
                 smp_processor_id());
    }

    //
    // get cpuid data for standard inputs for current cpu
    //
    for (i = 1; i <= p_cpu_infor0->ul_num_EAX_inputs; i++, pout++) {
        pout->ul_cpu_num = smp_processor_id();
        pout->ul_EAX_input = i;
        cpuid(i, &pout->ul_EAX_output, &pout->ul_EBX_output,
              &pout->ul_ECX_output, &pout->ul_EDX_output);
    }

    //                                                 
    // get cpuid data for extended inputs for current cpu
    //
    samp_dbg_print(KERN_INFO
             "VTUNE: ext outputs pCOUinfoR0 0x%x pout 0x%x std inputs %d  ext inputs %d pout->ul_EDX_output 0x%x \n",
             p_cpu_infor0, pout, p_cpu_infor0->ul_num_EAX_inputs,
             p_cpu_infor0->ul_num_EAX_extended_inputs, pout->ul_EDX_output);
    for (i = 0x80000001, j = 0; j < p_cpu_infor0->ul_num_EAX_extended_inputs;
         i++, j++, pout++) {
        pout->ul_cpu_num = smp_processor_id();
        pout->ul_EAX_input = i;
        cpuid(i, &pout->ul_EAX_output, &pout->ul_EBX_output,
              &pout->ul_ECX_output, &pout->ul_EDX_output);
    }

    return;
}

int
get_cpu_info(cpu_information * p_cpu_info)
{
    __u32 buf_size, num_cpus;
    cpu_information cpu_info;
    cpu_information *p_cpu_infor0;
    __u32 map_offset, map_size, out_offset, out_size;

    //
    // Validate input
    //

    if (!p_cpu_info) {
        return -EFAULT;
    }

    if (!access_ok(VERIFY_WRITE, p_cpu_info, sizeof(cpu_information))) {
        return -EFAULT;
    }

    copy_from_user(&cpu_info, p_cpu_info, sizeof(cpu_information));
    
    num_cpus = cpu_info.ul_num_cpus_available;

    if (num_cpus > MAX_PROCESSORS) {
        return -EINVAL;
    }

    map_offset = cpu_info.ul_offset_to_cpu_map_array;
    out_offset = cpu_info.ul_offset_to_cpuid_output_array;

    if (!map_offset || (map_offset < sizeof(cpu_information))) {
        return -EINVAL;
    }

    if (!out_offset || (out_offset < sizeof(cpu_information))) {
        return -EINVAL;
    }

    map_size = num_cpus * sizeof(cpu_map);
    out_size = num_cpus * (sizeof(cpuid_output) * (cpu_info.ul_num_EAX_inputs + cpu_info.ul_num_EAX_extended_inputs));

    if (map_offset > out_offset) {
        if ((out_offset + out_size) > map_offset) {
            return -EINVAL;
        }
        buf_size = map_offset + map_size;
    }
    else {
        if (out_offset > map_offset) {
            if ((map_offset + map_size) > out_offset) {
                return -EINVAL;
            }
            buf_size = out_offset + out_size;
        }
        else {
            return -EINVAL;
        }
    }

    samp_dbg_print(KERN_INFO
             "VTUNE: get_cpu_info() vmalloc buf_size = %d NumCpus %d mapOffset 0x%x outOffset 0x%x \n",
             buf_size, p_cpu_info->ul_num_cpus_available,
             p_cpu_info->ul_offset_to_cpu_map_array,
             p_cpu_info->ul_offset_to_cpuid_output_array);


    if (!access_ok(VERIFY_WRITE, p_cpu_info, buf_size)) {
        return -EFAULT;
    }

    //
    // Allocate buffer for cpu info
    //

    p_cpu_infor0 = vmalloc(buf_size);

    if (!p_cpu_infor0) {
        return (-ENOMEM);
    }
    copy_from_user(p_cpu_infor0, p_cpu_info, buf_size);

#ifdef CONFIG_SMP
    smp_call_function(get_cpu_info_for_current_cpu, (void *) p_cpu_infor0, 1, 1);
#endif
    get_cpu_info_for_current_cpu((void *) p_cpu_infor0);

    copy_to_user((void *) p_cpu_info, (void *) p_cpu_infor0, buf_size);

    vfree(p_cpu_infor0);

    return (0);
}
#endif              // linux32

/*
 *
 *  Function: samp_read_cpu_perf_counters
 *
 *  Description: 
 *  Read and Construct Event Totals
 *
 *  Parms:
 *      Entry:      prBuf
 *  
 *      Return:     status
 *
 */
int samp_read_cpu_perf_counters(RDPMC_BUF *pr_buf)
{
    RDPMC_BUF   *pr_buf_r0;


    pr_buf_r0 = vmalloc(sizeof(RDPMC_BUF));

    if (!pr_buf_r0) {
        return(-ENOMEM);
    }

    if (copy_from_user(pr_buf_r0, pr_buf, sizeof(RDPMC_BUF))) {
        samp_dbg_print(KERN_INFO "VTUNE: samp_read_cpu_perf_counters() copy_from_user failed \n");
        return(-EINVAL);
    }

    //
    // The caller should set cpuMaskIn and pmcMask.
    // Set defaults if the fields were not set by caller.
    //
    if (pr_buf_r0->cpu_mask_in == 0) {
        pr_buf_r0->cpu_mask_in = -1;
    }
    if (pr_buf_r0->pmc_mask.quad_part == 0) {
        pr_buf_r0->pmc_mask.low_part = -1;
    }
    pr_buf_r0->cpu_mask_out = 0;
    pr_buf_r0->duration = pdsa->duration;

#ifdef SMP_ON
    smp_call_function(read_cpu_perf_counters_for_current_cpu, (void *) pr_buf_r0, 1, 1);  
#endif
    read_cpu_perf_counters_for_current_cpu((void *) pr_buf_r0);  

    memcpy((void *) pr_buf, (void*) pr_buf_r0, sizeof(RDPMC_BUF));

    vfree(pr_buf_r0);

    return(0);
}

#ifdef linux64

/*
 *
 *
 *   Function:  InstallperfISR
 *
 *   description:
 *
 *   Parms:
 *       entry:  None
 *       
 *       return: None
 *
 *
 */
int
install_perf_isr(void)
{
    int status = -EPERM;

    samp_dbg_print(KERN_DEBUG "VTUNE: install_perf_isr entered... pmv 0x%p \n",
             itp_get_pmv());

    if (pebs_irqaction) {
        return (status);
    }

#if SA_PERCPU_IRQ_SUPPORTED

    ebs_irq = VTUNE_PERFMON_IRQ;
    pebs_irqaction = (struct irqaction *) 1;
    status =
        request_irq(VTUNE_PERFMON_IRQ, ebs_intr,
            SA_INTERRUPT | SA_PERCPU_IRQ, "VTune Sampling", NULL);

#else

    {
        char name[] = "VTune";

        pebs_irqaction = kmalloc(sizeof (struct irqaction), GFP_ATOMIC);
        if (pebs_irqaction) {
            memset(pebs_irqaction, 0, sizeof (struct irqaction));
            ebs_irq = VTUNE_PERFMON_IRQ;
            pebs_irqaction->handler = ebs_intr;
            pebs_irqaction->flags = SA_INTERRUPT;
            pebs_irqaction->name = name;
            pebs_irqaction->dev_id = NULL;

            register_percpu_irq(ebs_irq, pebs_irqaction);
            status = 0;
        }
    }
#endif              // SA_PERCPU_IRQ_SUPPORTED

    samp_dbg_print(KERN_DEBUG
             "VTUNE: install_perf_isr exit...... rc 0x%x   pmv 0x%p \n",
             status, itp_get_pmv());

    return (status);
}

void
uninstall_perf_isr(void)
{

    samp_dbg_print(KERN_DEBUG
             "VTUNE: uninstall_perf_isr entered... pmv 0x%p \n",
             itp_get_pmv());

    if (xchg(&pebs_irqaction, 0)) {
        free_irq(ebs_irq, NULL);
    }
    samp_dbg_print(KERN_DEBUG
             "VTUNE: uninstall_perf_isr exit...... pmv 0x%p \n",
             itp_get_pmv());
    return;
}

#endif              // linux64

/*
 *
 *
 *   Function:  init_driver_OS
 *
 *   description:
 *   Initialize OS specific portions of driver
 *
 *   Parms:
 *       entry:  None
 *   
 *       return: status
 *
 *
 */
int
init_driver_OS(void)
{
    /* Determine CPU support */
    vdrvgetsysinfo();

    /* Initialize Global Driver Locks */
    spin_lock_init(&sample_exec_lock);
    spin_lock_init(&sample_int_lock);
    spin_lock_init(&reg3f1_write_lock);

    p_current_mr_buf = 0;
    INIT_LIST_HEAD(&mr_buf_free_list_head);
    INIT_LIST_HEAD(&mr_buf_out_list_head);

    p_current_pid_buf = 0;
    INIT_LIST_HEAD(&pid_buf_free_list_head);
    INIT_LIST_HEAD(&pid_buf_out_list_head);

#ifdef linux32
    app_base_low = LINUX32_APP_BASE_LOW;
    app_base_high = LINUX32_APP_BASE_HIGH;
#endif

#ifdef linux64
    app_base_low = LINUX64_APP_BASE_LOW;
    app_base_high = LINUX64_APP_BASE_HIGH;
#endif

    install_OS_hooks();

    return (0);
}

/*
 *
 *
 *   Function:  init_module
 *
 *   description:
 *   initialization of driver resources and system registration.
 *
 *   Parms:
 *       entry:  None
 *   
 *       return: status
 *
 *
 */
int
init_module(void)
{
    int result;

    /* Obtain an available device number and obtain a lock on the device */

    result = register_chrdev(0, "vtune", &vtune_fops);

    if (result < 0) {
        SampPrint(KERN_INFO
              "VTune: * ERROR * Module register Failed!\n");
        return (result);
    }

    /* set major node number (created when first arg to register_chrdev is 0) */
    vtune_major = result;

#ifdef CONFIG_SMP
    /* Send load message to system */
    SampPrint(KERN_INFO
          "VTUNE: VTune Sampling Collector (SMP Enabled) v.%d.%d loading...\n",
          SYS_VERSION_MAJOR, SYS_VERSION_MINOR);
#else
    SampPrint(KERN_INFO
          "VTUNE: VTune Sampling Collector (SMP Disabled) v.%d.%d loading...\n",
          SYS_VERSION_MAJOR, SYS_VERSION_MINOR);
#endif

    return 0;
}

void
begin_hook(void)
{
    atomic_inc(&hook_in_progress);
    return;
}

void
end_hook(void)
{
    atomic_dec(&hook_in_progress);
    return;
}

/*
 *
 *
 *   Function:  stop_sampling
 *
 *   description:
 *       This routine stops sampling.
 *
 *   Parms:
 *       entry:  None
 *
 *       return: status
 *
 *
 */
void
stop_sampling(boolean_t do_wakeup)
{

    samp_dbg_print(KERN_DEBUG
             "VTUNE: stop_sampling() entered  current pid %d \n",
             current->pid);

    /* START - Critical Section */
    spin_lock(&sample_exec_lock);

    samp_info.sampling_active = FALSE;
    track_module_loads = FALSE;
    track_process_creates = FALSE;
    //
    // Wait for any in progress interrupt handlers to complete
    //
    while (atomic_read(&samp_isr_active)) ;

    if (samp_info.flags & SINFO_STARTED) {

        samp_info.flags &= ~(SINFO_STARTED | SINFO_DO_STOP);
        samp_info.flags |= (SINFO_DO_WRITE | SINFO_STOPPING);
        signal_thread_event = FALSE;

        //
        // Save current time as stop time, cancel max sample time timer,
        // and signal the thread to write data and free resources.
        //

        pdsa->duration = (unsigned long) ((jiffies - start_time) * 10);
        samp_info.sample_time = pdsa->duration; // 12-15-97
        pdsa->tot_samples = pdsa->sample_count;

        //
        // Clear current sample count
        // to avoid confusing future
        // sampling sessions.
        //
        samp_info.sample_count = 0;
        pdsa->sample_count = 0; // 04-01-96
        pdsa->pause_count = 0;
        pdsa->running = 0;

        spin_unlock(&sample_exec_lock);

        samp_dbg_print(KERN_INFO
                 "VTUNE: sampling stopped. total_loads %d total_loads_init %d  total_pid_updates %d buf size %d \n",
                 total_loads, total_loads_init, total_pid_updates,
                 MR_BUFFER_DATA_SIZE);

        if (sample_method & METHOD_EBS) {
            vdrv_stop_EBS();
        }

        if (xchg(&g_start_delay_timer_ON, 0)) {
            del_timer(&delay_tmr);
        }
        if (xchg(&g_max_samp_timer_ON, 0)) {
            del_timer(&time_out_tmr);
        }

        if (do_wakeup) {
            wake_up_interruptible(&pc_write);
        }
    } else {
        spin_unlock(&sample_exec_lock);
    }

    return;
}

void
samp_cleanup(void)
{
    void *pbuf;

    samp_info.flags = SINFO_STOP_COMPLETE;

    /* Free sampling buffer back to OS */
    if ((pbuf = xchg(&buf_start, 0))) {
        vfree(pbuf);
    }

    if (pdsa) {
        pdsa->pause_count = 0;
        pdsa->running = 0;
    }
#ifdef linux64
    uninstall_perf_isr();
#endif
    return;
}

/*
 *
 *
 *   Function:  cleanup_module
 *
 *   description:
 *
 *   Parms:
 *       entry:  None
 *   
 *       return: None
 *
 *
 */
void
cleanup_module(void)
{
    stop_sampling(TRUE);
    samp_cleanup();

    driver_unload();
    un_install_OS_hooks();
    unregister_chrdev(vtune_major, "vtune");
    return;
}

/*
 *
 *
 *   Function:  vtune_open
 *
 *   description:
 *
 *   Parms:
 *       entry:  *inode
 *               *filp
 *
 *       return: status
 *
 *
 */
int
vtune_open(struct inode *inode, struct file *filp)
{
  
#ifdef ENFORCE_ROOT_ONLY_ACCESS
    if (!capable(CAP_SYS_PTRACE)) {
        printk(KERN_INFO "VTUNE: module open failed, root access required \n");
        return -EPERM;
    }
#endif
  
    if (MINOR(inode->i_rdev) == 1) {
        if (file_count) {
            return -EBUSY;
        } else {
            file_count++;
        }
    }
    driver_open();
    return (0);
}

/*
 *
 *
 *   Function:  vtune_write
 *
 *   description:
 *
 *   Parms:
 *       entry:  *filp
 *               *buf
 *               count
 *               *ppos
 *   
 *       return: status
 *
 *
 */
ssize_t
vtune_write(struct file * filp, const char *buf, size_t count, loff_t * ppos)
{
    return -ENOSYS;
}



/*
 *
 *
 *   Function:  vtune_ioctl
 *
 *   description:
 *
 *   Parms:
 *       entry:  *inode
 *               *filp
 *               cmd
 *               *arg
 *
 *       return: status
 *
 *
 */
int
vtune_ioctl(struct inode *inode, struct file *filp,
        unsigned int cmd, unsigned long arg)
{
    int status, sp_offset, sp_length;

    samp_parm_ex *spx = NULL;
    samp_parm6   *sp6 = NULL;

    status = sp_length = 0;

#ifdef ENFORCE_ROOT_ONLY_ACCESS
    if (!capable(CAP_SYS_PTRACE))
      return -EPERM;
#endif

    switch (cmd) {
    case VTUNE_CONFIG_EX:
    case VTUNE_START_EX:

        //
        // Verify access to samp parm header
        //
        if (!access_ok(VERIFY_READ, arg, sizeof(samp_parm_header))) {
            status = -EINVAL;
            break;
        }

        spx = (samp_parm_ex *) arg;
        sp_offset = spx->hdr.sp_offset;
        sp_length = spx->hdr.sp_length;

        if (sp_offset < sizeof(samp_parm_header)) {
            status = -EINVAL;
            break;
        }

        //
        // Verify read access to entire samp parm structure
        // 
        if (!access_ok(VERIFY_READ, arg, sp_offset + sp_length)) {
            status = -EINVAL;
            break;
        }

        switch (spx->hdr.sp_version) {
        case 6:
            //
            // Configure or start sampling
            //
            sp6 = (void *) spx + sp_offset;
            if (cmd == VTUNE_CONFIG_EX) {
                status = samp_configure6(sp6, sp_length);
            }
            else {
                status = start_sampling6(sp6, sp_length);
            }
            break;
        default:
            status = -EINVAL;
            break;
        }

        break;
    case VTUNE_READPERF:

        if (!access_ok(VERIFY_WRITE, arg, sizeof(RDPMC_BUF))) {
            status = -EINVAL;
            break;
        }
        status = samp_read_cpu_perf_counters((RDPMC_BUF *) arg);
        break;
    case VTUNE_STOP:

        status = vtune_sampuserstop();
        break;
    case VTUNE_STAT:

        if (!access_ok(VERIFY_WRITE, arg, sizeof(sampinfo_t))) {
            status = -EINVAL;
            break;
        }
        samp_info.sample_rec_length = pdsa->sample_rec_length;
        copy_to_user((void *) arg, (void *) &samp_info, sizeof (sampinfo_t));
        status = 0;
        break;
    case VTUNE_SYSINFO:

        if (!access_ok(VERIFY_WRITE, arg, sizeof(vtune_sys_info))) {
            status = -EINVAL;
            break;
        }
        copy_to_user((void *) arg, (void *) &vtune_sys_info, sizeof (vtune_sys_info));
        status = 0;
        break;
#ifdef linux32
    case VTUNE_GETCPUINFO:

        status = get_cpu_info((cpu_information *) arg);
        break;
#endif
    default:
        status = -EINVAL;
        break;
    }
    return (status);
}

/*
 *
 *
 *   Function:  vtune_read_pcs
 *
 *   description:
 *
 *   Parms:
 *       entry:  *filp
 *               *buf
 *               count
 *               *ppos
 *
 *       return: Size
 *
 *
 */
size_t
samp_write_pc_file(struct file * filp, char *buf, size_t count, loff_t * ppos)
{
    size_t buf_len, bytes_not_copied;
    boolean_t do_copy, do_stop;

    samp_dbg_print(KERN_DEBUG
             "VTUNE: samp_write_pc_file() entered  current pid %d \n",
             current->pid);

    for (;;) {
        if (xchg(&b_stop_sampling, 0)) {
            stop_sampling(FALSE);
        }

        spin_lock(&sample_exec_lock);
        do_copy = do_stop = FALSE;

        buf_len = (size_t) p_sample_buf - (size_t) buf_start;
        if ((samp_info.flags & SINFO_DO_WRITE) && buf_len) {
            do_copy = TRUE;
        }
        if (samp_info.flags & SINFO_STOPPING) {
            do_stop = TRUE;
        }
        samp_info.flags &= ~SINFO_DO_WRITE;
        spin_unlock(&sample_exec_lock);

        bytes_not_copied = 0;
        if (do_copy) {
            if (samp_parms.calibration) {
                do_copy = FALSE;
            } else {
                bytes_not_copied =
                    copy_to_user(buf, buf_start, buf_len);
                samp_dbg_print(KERN_DEBUG
                         "VTUNE: samp_write_pc_file() pid %x copy_to_user  size %d bytes_not_copied %d \n",
                         current->pid, buf_len,
                         bytes_not_copied);
            }
            p_sample_buf = buf_start;
            current_buffer_count = 0;
            memset((char *) buf_start, 0, buf_length);
        }
        if (do_copy) {
            if (bytes_not_copied == 0) {
                if (sample_method & METHOD_EBS) {
                    restart_EBS = TRUE;
                }
                break;
            } else {
                stop_sampling(FALSE);
                do_stop = TRUE;
            }
        }
        if (do_stop) {
            samp_dbg_print(KERN_DEBUG
                     "VTUNE: samp_write_pc_file() pid %x stopping \n",
                     current->pid);
            stop_sampling(FALSE);
            samp_cleanup();
            buf_len = 0;
            break;
        }
        spin_lock(&sample_exec_lock);
        if (samp_info.flags & SINFO_STARTED) {
            if (xchg(&restart_EBS, 0)) {
                samp_info.sampling_active = TRUE;
                spin_unlock(&sample_exec_lock);
                samp_dbg_print(KERN_DEBUG
                         "VTUNE: samp_write_pc_file() pid %x resume EBS \n",
                         current->pid);
                vdrv_resume_EBS();
            } else {
                spin_unlock(&sample_exec_lock);
            }
        } else {
            spin_unlock(&sample_exec_lock);
            buf_len = 0;
            break;
        }
        samp_dbg_print(KERN_DEBUG
                 "VTUNE: samp_write_pc_file() pid %x entering sleep \n",
                 current->pid);
        wait_event_interruptible(pc_write, (!samp_info.sampling_active));
    }
    return (buf_len);
}

/*
 *
 *
 *   Function:  samp_write_module_file
 *
 *   description:
 *
 *   Parms:
 *       entry:  *fp
 *               *ubuf
 *               cnt
 *               *pp
 *
 *   return: status
 *
 *
 */
ssize_t
samp_write_module_file(struct file * fp, char *ubuf, size_t cnt, loff_t * pp)
{
    unsigned int rec_len, bytes_written, bytes_in_ubuf;
    char *pubuf;
    module_record *mra;

    bytes_in_ubuf = cnt;
    pubuf = ubuf;
    bytes_written = 0;
    while ((mra = get_next_mr_rec_from_out_list())) {
        rec_len = mra->rec_length;

        if (!mra->pid_rec_index_raw) {  // skip modules records that were created before a failed sys_call (eg fork, clone etc.)
            add_to_mr_buf_out_offset(rec_len);
            continue;
        }
        if (bytes_in_ubuf < rec_len) {
            break;
        }
        if (copy_to_user(pubuf, mra, rec_len)) {
            samp_dbg_print(KERN_INFO
                     "VTUNE: samp_write_module_file() copy_to_user failed \n");
            return -EFAULT;
        }
        add_to_mr_buf_out_offset(rec_len);

        bytes_written += rec_len;
        pubuf += rec_len;
        bytes_in_ubuf -= rec_len;
    }
    fp->f_pos += bytes_written;
    samp_dbg_print(KERN_INFO
             "VTUNE: samp_write_module_file() cnt %d bytes_written %d \n",
             cnt, bytes_written);
    return (bytes_written);
}

/*
 *
 *
 *   Function:  samp_write_pid_file
 *
 *   description:
 *
 *   Parms:
 *       entry:  *fp
 *               *ubuf
 *               cnt
 *               *pp
 *
 *   return: status
 *
 *
 */
ssize_t
samp_write_pid_file(struct file * fp, char *ubuf, size_t cnt, loff_t * pp)
{
    unsigned int rec_len, bytes_written, bytes_in_ubuf;
    char *pubuf;
    pid_record *pra;

    bytes_in_ubuf = cnt;
    pubuf = ubuf;
    bytes_written = 0;
    while ((pra = get_next_pid_rec_from_out_list())) {
        rec_len = sizeof (pid_record);

        if (bytes_in_ubuf < rec_len) {
            break;
        }
        if (copy_to_user(pubuf, pra, rec_len)) {
            samp_dbg_print(KERN_INFO
                     "VTUNE: samp_write_pid_file() copy_to_user failed \n");
            return -EFAULT;
        }
        add_to_pid_buf_out_offset(rec_len);

        bytes_written += rec_len;
        pubuf += rec_len;
        bytes_in_ubuf -= rec_len;
    }
    fp->f_pos += bytes_written;
    samp_dbg_print(KERN_INFO
             "VTUNE: samp_write_pid_file() cnt %d bytes_written %d \n", cnt,
             bytes_written);
    return (bytes_written);
}

/*
 *
 *
 *   Function:  vtune_read
 *
 *   description:
 *
 *   Parms:
 *       entry:  *filp
 *               *buf
 *               count
 *               *ppos
 *
 *       return: Size
 *
 *
 */
ssize_t
vtune_read(struct file * filp, char *buf, size_t count, loff_t * ppos)
{
    ssize_t n;
    struct inode *inode = filp->f_dentry->d_inode;

    samp_dbg_print(KERN_DEBUG
             "VTUNE: vtune_read() entered  current pid %d \n",
             current->pid);

#ifdef ENFORCE_ROOT_ONLY_ACCESS
    if (!capable(CAP_SYS_PTRACE))
      return -EPERM;
#endif

    //
    // Add semaphore here for one thread at a time
    //

    if (xchg(&b_stop_sampling, 0)) {
        stop_sampling(FALSE);
    }

    n = 0;
    switch (MINOR(inode->i_rdev)) {
    case PCMINOR:
        n = samp_write_pc_file(filp, buf, count, ppos);
        break;
    case MDMINOR:
        n = samp_write_module_file(filp, buf, count, ppos);
        if (!n && !(samp_info.flags & SINFO_STARTED)) {
            move_current_mr_buf_to_out_list();
            n = samp_write_module_file(filp, buf, count, ppos);
            if (!n) {
                free_mr_bufs();
            }
        }
        break;
    case PIDMINOR:
        n = samp_write_pid_file(filp, buf, count, ppos);
        if (!n && !(samp_info.flags & SINFO_STARTED)) {
            move_current_pid_buf_to_out_list();
            n = samp_write_pid_file(filp, buf, count, ppos);
            if (!n) {
                free_pid_bufs();
            }
        }
        break;
    default:
        break;
    }
    return (n);
}

/*
 *
 *
 *   Function:  vtune_release
 *
 *   description:
 *
 *   Parms:
 *       entry:  *inode
 *               *filp
 *
 *       return: status
 *
 *
 *
 */
int
vtune_release(struct inode *inode, struct file *filp)
{
    if (MINOR(inode->i_rdev) == 1)
        file_count--;

    return 0;
}

/*
 *
 *
 *   Function: vdrvgetsysinfo
 *
 *   description:
 *   This routines provides CPU info for local storage and
 *   export that will be placed into the TB3 file header. For
 *   local lookup we reference the kernel's exported
 *   "boot_cpu_data" struct. For export we execute the
 *   CPUID data and store it accordingly
 *
 *
 *   Parms:
 *       entry:  *callerinfo
 *               *system_cpu_data
 *
 *       return: status
 *
 *
 */

int
vdrvgetsysinfo(void)
{
    int iloop;

    memset(&vtune_sys_info, 0, sizeof (sys_samp_info));
    /* Determine CPUID and features */
#ifdef linux32
    g_this_CPUID = cpuid_eax(1);
    g_this_CPU_features = cpuid_edx(1);
    g_CPU_family = (char) ((g_this_CPUID & CPUFAMILY) >> 8);
#endif

#ifdef linux64
    g_this_CPUID = itp_get_cpuid(3);   // Itanium cpuid reg 3 - version information 39:32 arch rev, 31:24 family, 23:16 model, 15:8 rev, 7:0 largest cpuid index
    g_this_CPU_features = itp_get_cpuid(4);    //
    g_CPU_family = (__u32) ((g_this_CPUID & ITP_CPUID_REG3_FAMILY) >> 24);
#endif

#ifdef CONFIG_SMP
    /* Total CPUs detected */
    vtune_sys_info.num_processors = smp_num_cpus;
#else
    vtune_sys_info.num_processors = 1;
#endif

    /* Zero Out local storage */
    for (iloop = 0; iloop < MAX_PROCS; iloop++)
        vtune_sys_info.cpu_I_dmap[iloop] = 0;

#ifdef CONFIG_SMP
    /* Fill in cpu_ID info for all processors */
    for (iloop = 0; iloop < smp_num_cpus; iloop++)
        vtune_sys_info.cpu_I_dmap[iloop] = g_this_CPUID;
#else
    vtune_sys_info.cpu_I_dmap[0] = g_this_CPUID;
#endif

    /* Store additional CPU info */
    vtune_sys_info.cpu_feature_bits = g_this_CPU_features;

    vtune_sys_info.feature_set |=
        FSET_DRV_PROCESS_CREATES_TRACKED_IN_MODULE_RECS;

    /* Driver Version info */
    vtune_sys_info.sysdrv_version_major = SYS_VERSION_MAJOR;
    vtune_sys_info.sysdrv_version_minor = SYS_VERSION_MINOR;

    samp_dbg_print(KERN_DEBUG
             "VTUNE: vdrvgetsysinfo() CPUID: %08lx  features:  %08lx  Family: %X  numcpus %d \n",
             g_this_CPUID, g_this_CPU_features, g_CPU_family,
             vtune_sys_info.num_processors);

    return (0);
}

/*
 *
 *
 *   Function:  vtune_sampuserstop
 *
 *   description:
 *   This routine stops sampling, writes frees resources and 
 *   optionally copies the sampling information structure to the 
 *   callers buffer. If sampling mode is EBS we need to disable
 *   the APIC delivery INTR and signal the task wake up
 *   manually from here.
 *   
 *   Parms:
 *       entry:  out_buf  - pointer to caller's optional 
 *                         samp_info buffer
 *   
 *       return: status
 *
 *
 */
int
vtune_sampuserstop(void)
{
    samp_dbg_print(KERN_DEBUG "VTUNE: vtune_sampuserstop() pid %x entered \n",
             current->pid);
    stop_sampling(TRUE);

    return (0);
}

/*
 *
 *
 *   Function:   start_sampling6
 *
 *   description:
 *   Sets up sampling session. For TBS sampling  we arm the timer
 *   handler in the kernel patch. For EBS we call the specific
 *   setup routine to enable the performance counter overflow
 *   detect. Startup delay and interval timers are also initialized
 *   here.
 *
 *   Note:   This routine is now a merged source of the original
 *           Linux Config and Start Sampling functions.
 *
 *   Parms:
 *       entry:  samp_parm6
 *   
 *       return: status
 *
 *
 */
int
start_sampling6(samp_parm6 * sp6, int sp6_len)
{
    int i;
    unsigned long max_sample_time = 0;

    /* Check Sampling State */
    if ((samp_info.sampling_active == TRUE)
        || (samp_info.
        flags & (SINFO_STARTED | SINFO_STOPPING | SINFO_WRITE)))
        return (-EBUSY);

    /* Configure for SP6 Parms */
    i = samp_configure6(sp6, sp6_len);
    if (i)
        return (i);

    /* Xfer current SP6 to global store */
    memcpy(&samp_parms, sp6, sizeof (samp_parm6));

    /* Save sample rate in microseconds and milliseconds. Set default if necessary. */
    if (!sample_rate_us)
        sample_rate_us = 1000;


    if (sample_method & METHOD_EBS) {
        memset(package_status, 0, sizeof (package_status));
        memset(processor_status, 0, sizeof (processor_status));
        memset(processor_EBS_status, 0, sizeof (processor_EBS_status));
#ifdef linux32
        //
        // Validate EBS regs
        //
        if (validate_EBS_regs()) {
            // return (STATUS_INVALID_PARAMETER);
        }
        //
        // Make sure apic is enabled. This fixes "no samples" problem after 
        // going into and out of "standby" mode on laptop 
        //
        SAMP_Set_Apic_Virtual_Wire_Mode();
#endif
    }

    /* Sanity check to we cannot go lower than 1ms */
    sample_rate_ms = sample_rate_us / 1000;
    sample_rate_ms = (sample_rate_ms >= 1) ? sample_rate_ms : 1;

    samp_info.sample_time = 0;
    samp_info.profile_ints_idt = 0;
    samp_info.profile_ints = 0;
    samp_info.sample_count = 0;
    samp_info.test_IO_errs = 0;
    current_buffer_count = 0;
    sample_max_samples = samp_parms.maximum_samples;

    pdsa->sample_session_count++;   //                     06-25-97
    pdsa->sample_count = 0;
    pdsa->sample_skip_count = 0;    //                     01-15-99
    pdsa->suspend_start_count = 0;  //                     01-15-99
    pdsa->suspend_end_count = 0;    //                     01-15-99

    pdsa->duration = 0;
    pdsa->tot_samples = 0;
    pdsa->tot_idt_ints = 0;
    pdsa->tot_profile_ints = 0;
    pdsa->tot_skip_samp = 0;


    pebs_err = 0;       // clear count              05-31-00 WMT

    /* Remove any previous allocations before initializing sample buffer. */
    if (buf_start != NULL)
        vfree(buf_start);

    /* Make sure to check/set default buffer size */
    if (!samp_parms.samps_per_buffer)
        samp_parms.samps_per_buffer = 1000;

    /* Allocate sample buffer per user requested size and initialize. 
     * For safeguard of memory boundary issues with vmalloc() we alloc
     * extra memory for sampling headroom. buffer size should match
     * copy_to_user() recipient in user mode. */
    buf_length = samp_parms.samps_per_buffer * sizeof (sample_record_PC);
    samp_dbg_print(KERN_DEBUG
             "VTUNE: start_sampling6()   samps_per_buffer %d  buf_length %d \n",
             samp_parms.samps_per_buffer, buf_length);
    buf_start = (void *) vmalloc(buf_length);

    /* Check for allocation success */
    if (!buf_start) {
        return (-ENOMEM);
    }

    /* Initialize buffer Contents */
    memset((char *) buf_start, 0, buf_length);


    /* Init Sampling Stats - Sanity Check */
    samp_info.flags = SINFO_STOP_COMPLETE;
    samp_info.sampling_active = FALSE;
    signal_thread_event = FALSE;
    samp_info.sample_count = 0;

    /* Initialize Sampling buffer */
    buf_end = (void *) ((unsigned long) buf_start + buf_length);
    p_sample_buf = buf_start;
    memset((char *) buf_start, 0, buf_length);
    current_buffer_count = 0;

    /* Initialize Module load info */
    num_mod_rec = 0;
    num_pid_create_rec = 0;
    // The following pointer should also be initialized!
    g_MR_pointer_head = NULL;

    /* Reset Timer status */
    if (g_start_delay_timer_ON) {
        g_start_delay_timer_ON = FALSE;
        del_timer(&delay_tmr);
    }
    if (g_max_samp_timer_ON) {
        g_max_samp_timer_ON = FALSE;
        del_timer(&time_out_tmr);
    }

    /* Start delay Timer */
    if (samp_parms.start_delay) {
        g_start_delay_timer_ON = TRUE;
        init_timer(&delay_tmr);
        delay_tmr.expires = jiffies + samp_parms.start_delay * HZ;
        delay_tmr.function = (void (*)(unsigned long)) samp_start_delay;
        add_timer(&delay_tmr);
        interruptible_sleep_on(&samp_delay);
    }

    /* Set max Sampling Time */
    b_stop_sampling = FALSE;
    restart_EBS = FALSE;
    if (samp_parms.max_interval) {
        g_max_samp_timer_ON = TRUE;
        max_sample_time = samp_parms.max_interval * HZ;
        init_timer(&time_out_tmr);
        time_out_tmr.function =
            (void (*)(unsigned long)) samp_max_sample_time_expired;
        time_out_tmr.expires = jiffies + max_sample_time;
        add_timer(&time_out_tmr);
    }

    samp_dbg_print(KERN_INFO "VTUNE: sampling started pid %x \n",
             current->pid);

    //
    // Preallocate buffers to track system-wide module loads           
    //
    mgid = 0;
    p_current_mr_buf = 0;
    p_mr_buf_out = 0;
    mr_buf_out_offset = 0;
    free_mr_bufs();
    free_pid_bufs();

    samp_info.flags = SINFO_STARTED;
    samp_info.sampling_active = TRUE;

    /* Kick off all counters on each CPU */
    for (i = 0; i < MAX_PROCS; i++) {
        start_all[i] = TRUE;
    }

    /* Track start time for elapsed measurement */
    start_time = jiffies;

    total_loads = total_loads_init = total_pid_updates = 0; // Debug Code

    track_module_loads = track_process_creates = FALSE;
    if (!samp_parms.calibration) {
        for (i = 0; i < 4; i++) {
            add_mr_buf_to_free_list(alloc_new_mr_buf());
        }
        for (i = 0; i < 4; i++) {
            add_pid_buf_to_free_list(alloc_new_pid_buf());
        }
        enum_user_mode_modules();
        total_loads_init = total_loads; // Debug Code
        track_module_loads = track_process_creates = TRUE;
    }

    if (sample_method & METHOD_EBS) {
#ifdef linux64
        set_PP_SW();
        install_perf_isr();
#endif
#ifdef linux32
        vdrv_init_emon_regs();
#endif
        vdrv_start_EBS();
    }

    return (0);
}

/*
 *
 *
 *   Function:  samp_start_delay
 *
 *   description:
 *   Wake up timer delay handler to start sampling.
 *
 *   Parms:
 *       entry:  ptr
 *   
 *       return: None
 *
 *
 */
void
samp_start_delay(unsigned long ptr)
{
    /* Wake up - Start Sampling! */
    g_start_delay_timer_ON = FALSE;
    wake_up_interruptible(&samp_delay);

    return;
}

/*
 *
 *
 *   Function:  samp_max_sample_time_expired
 *
 *   description: 
 *   Terminates sampling from user timeout spec,
 *
 *   Parms:
 *       entry:  ptr         - pointer to caller's option
 *                             samp_info buffer
 *   
 *       return: None
 *
 *
 */
void
samp_max_sample_time_expired(unsigned long ptr)
{
    g_max_samp_timer_ON = FALSE;
    xchg(&b_stop_sampling, 1);
    wake_up_interruptible(&pc_write);

    return;
}

/*
 *
 *
 *   Function:  samp_get_stats
 *
 *   description:
 *   This routine copies the sampling information structure
 *   to the callers buffer.
 *
 *   Parms:
 *       entry:  out_buf       - pointer to caller's optional 
 *                              samp_info buffer
 *   
 *       return: status
 *
 *
 */
int
samp_get_stats(sampinfo_t * out_buf)
{
    if (out_buf)
        copy_to_user(out_buf, &samp_info, sizeof (sampinfo_t));

    return (0);
}

/*
 *
 *
 *   Function:  samp_get_parm
 *
 *   description:
 *   Retrieves the current sampling parameters/configuration 
 *   contained in samp_parms.
 *
 *   Parms:
 *       entry:  out_buf      - pointer to caller's optional 
 *                             samp_info buffer
 *
 *       return: status  -  OK
 *       
 *
 */
int
samp_get_parm(samp_parm3 * out_buf)
{
    if (out_buf)
        copy_to_user(out_buf, &samp_parms, sizeof (samp_parm3));

    return (0);
}

/*
******************************************************************************
                              E B S   S A M P L I N G
******************************************************************************
*/

/*
 *
 *   Function:  ebs_intr
 *
 *   description:
 *   Event-based sampling interrupt handler. 
 *
 *   Parms:
 *       entry:  *regs   -Currenct CPU register state.
 *   
 *       return: None
 *
 */
#ifdef linux32
asmlinkage void
ebs_intr(struct pt_regs *regs)
#endif
#ifdef linux64
void
ebs_intr(int irq, void *arg, struct pt_regs *regs)
#endif
{
    INT_FRAME int_frame;
    u32 wake_up_thread;

#ifdef linux32
    samp_stop_emon();
    int_frame.seg_cs = regs->xcs;
    int_frame.eip = regs->eip;
    int_frame.E_flags = regs->eflags;

#elif linux64
    int_frame.iip.quad_part = regs->cr_iip;
    int_frame.ipsr.quad_part = regs->cr_ipsr;
    if (int_frame.ipsr.quad_part & IA64_PSR_IS) {   // check addressing mode at time of profile int (Itanium or IA32)
        unsigned long eflag, csd;

        asm("mov %0=ar.eflag;"  // get IA32 eflags
            "mov %1=ar.csd;"    // get IAA32 unscrambled code segment descriptor
          :    "=r"(eflag), "=r"(csd));
        int_frame.E_flags = (__u32) eflag;
        int_frame.csd = csd;
        int_frame.seg_cs = (__u32) regs->r17;
    }
#endif

    atomic_inc(&samp_isr_active);
    wake_up_thread = samp_emon_interrupt(&int_frame);

    //
    // If time to signal thread then call wake_up_interruptible() to
    // handle the "wake_up_thread" condition.
    //
    if (wake_up_thread) {
        if (samp_info.flags & SINFO_DO_STOP) {
            xchg(&b_stop_sampling, 1);
        }
        wake_up_interruptible(&pc_write);
    }

    if (samp_info.sampling_active) {
        samp_start_emon(FALSE);
    }

    atomic_dec(&samp_isr_active);

    return;
}

/*
 *
 *
 *   Function:  vdrv_start_EBS
 *
 *   description:
 *   Dispatches control to activate sampling on all CPUs.
 *
 *   Parms:
 *       entry:  Dispatch Function
 *               NULL
 *               1
 *               0
 *
 *       return: status
 *
 *
 */
void
vdrv_init_emon_regs(void)
{

#ifdef linux32
#ifdef CONFIG_SMP
    smp_call_function(samp_init_emon_regs, NULL, 1, 1); // wait=TRUE to wait for function to complete on other cpu's
#endif

    samp_init_emon_regs(NULL);  //
#endif

    return;
}

/*
 *
 *
 *   Function:  vdrv_start_EBS
 *
 *   description:
 *   Dispatches control to activate sampling on all CPUs.
 *
 *   Parms:
 *       entry:  Dispatch Function
 *               NULL
 *               1
 *               0
 *
 *       return: status
 *
 *
 */
int
vdrv_start_EBS(void)
{

#ifdef CONFIG_SMP
    smp_call_function(samp_start_profile_interrupt, NULL, 1, 0);
#endif

    samp_start_profile_interrupt(NULL);

    return (0);
}

/*
 *
 *   Function:  vdrv_resume_EBS
 *
 *   description:
 *   Dispatches control to resume sampling on all CPUs.
 *
 *   Parms:
 *       entry:  Dispatch Function
 *               NULL
 *               1
 *               0
 *
 *       return: status
 *
 */
int
vdrv_resume_EBS(void)
{

#ifdef CONFIG_SMP
    smp_call_function(samp_start_emon, (void *) 1, 1, 0);    // call samp_start_emon() with do_start=TRUE
#endif

    samp_start_emon((void *) 1);

    return (0);
}

/*
 *
 *   Function:  vdrv_stop_EBS
 *
 *   description:
 *   Dispatches control to deactivate sampling on all CPUs.
 *
 *   Parms:
 *       entry:  Dispatch Function
 *               NULL
 *               1
 *               0
 *
 *       return: None
 *
 */
void
vdrv_stop_EBS(void)
{

#ifdef CONFIG_SMP
    smp_call_function(samp_stop_profile_interrupt, NULL, 1, 1);     // wait=TRUE
#endif

    samp_stop_profile_interrupt(NULL);
    return;
}

/*
******************************************************************************
                    M O D U L E / P R O C E S S    T R A C K I N G
******************************************************************************
*/

void
alloc_module_group_ID(PMGID_INFO pmgid_info)
{
    if (!pmgid_info) {
        return;
    }

    memset(pmgid_info, 0, sizeof (MGID_INFO));
    spin_lock(&mgid_lock);
    mgid++;
    pmgid_info->mgid = mgid;
    spin_unlock(&mgid_lock);

    return;
}

PMR_BUF
get_next_mr_buf_on_out_list(PMR_BUF pmr_buf)
{
    __u32 i;
    LIST_ENTRY *p_list;

    if (!pmr_buf) {
        if (p_mr_buf_out) {
            return (p_mr_buf_out);
        }
    }

    i = (pmr_buf) ? 0 : 1;
    list_for_each(p_list, &mr_buf_out_list_head) {
        if (i) {
            return ((PMR_BUF) p_list);
        }
        if (p_list == &pmr_buf->link) {
            i++;
        }
    }
    return (0);
}

void
update_pid_for_module_group(__u32 pid, PMGID_INFO pmgid_info)
{
    __u32 mr_mgid, first_mr_found, last_mr_found;
    void_ptr mr_first, mr_last, pbuf, pbuf_end;
    PMR_BUF pmr_buf;
    module_record *mra;

    if (!(samp_info.flags & SINFO_STARTED) || !pmgid_info) {
        return;
    }

    total_pid_updates++;    // Debug Code

    samp_dbg_print(KERN_INFO
             "VTUNE: update_pid_for_module_group  pid %x mgid %d mrfirst %x mr_last %x \n",
             pid, pmgid_info->mgid, pmgid_info->mr_first,
             pmgid_info->mr_last);

    spin_lock(&vtune_modlist_lock);
    mr_first = pmgid_info->mr_first;
    mr_last = pmgid_info->mr_last;
    mr_mgid = pmgid_info->mgid;

    for (;;) {
        if ((mr_first == 0) || (mr_last == 0)) {
            mr_first = mr_last = 0;
        }
        //
        // Look in current module buffer for module records in module group.
        //
        first_mr_found = last_mr_found = 0;
        pmr_buf = p_current_mr_buf;
        samp_dbg_print(KERN_INFO "VTUNE: Update... p_current_mr_buf %x \n",
                 p_current_mr_buf);
        if (pmr_buf) {
            pbuf = &pmr_buf->buf;
            pbuf_end = pbuf + pmr_buf->bytes_in_buf;
            if ((mr_first >= pbuf) && (mr_first <= pbuf_end)) {
                first_mr_found = TRUE;
                pbuf = mr_first;
            }
            if ((mr_last >= pbuf) && (mr_last < pbuf_end)) {
                pbuf_end = mr_last + 1;
            }
            for (; pbuf < pbuf_end; pbuf += mra->rec_length) {
                mra = (module_record *) pbuf;
                samp_dbg_print(KERN_INFO
                         "VTUNE: update_pid_for_module_group  checking record mra %x pid_rec_index %d \n",
                         mra, mra->pid_rec_index);
                if (!mra->pid_rec_index_raw
                    && (mra->pid_rec_index == mr_mgid)) {
                    mra->pid_rec_index = pid;
                    mra->pid_rec_index_raw = 1;
                    samp_dbg_print(KERN_INFO
                             "VTUNE: update_pid_for_module_group  module record updated pid %x mgid %d mra %x \n",
                             pid, pmgid_info->mgid, mra);
                }
            }
            if (first_mr_found) {
                break;
            }
        }
        //
        // Scan module record buffers on output list from oldest to newest.
        // Update pid for each module record in the module group.
        //
        pmr_buf = 0;
        first_mr_found = last_mr_found = FALSE;
        while ((pmr_buf = get_next_mr_buf_on_out_list(pmr_buf))) {
            samp_dbg_print(KERN_INFO
                     "VTUNE: Update... 2 pmr_buf %x \n", pmr_buf);
            pbuf = &pmr_buf->buf;
            pbuf_end = pbuf + pmr_buf->bytes_in_buf;
            if (!first_mr_found) {
                if ((mr_first >= pbuf) && (mr_first <= pbuf_end)) {
                    first_mr_found = TRUE;
                    pbuf = mr_first;
                } else {
                    if (mr_first) {
                        continue;   // if mr_first specified, then skip to next (newer) mr buffer
                    }
                }
            }
            if ((mr_last >= pbuf) && (mr_last < pbuf_end)) {
                pbuf_end = mr_last + 1;
                last_mr_found = TRUE;
            }
            for (; pbuf < pbuf_end; pbuf += mra->rec_length) {
                mra = (module_record *) pbuf;
                samp_dbg_print(KERN_INFO
                         "VTUNE: update_pid_for_module_group 2 checking record mra %x pid_rec_index %d \n",
                         mra, mra->pid_rec_index);
                if (!mra->pid_rec_index_raw
                    && mra->pid_rec_index == mr_mgid) {
                    mra->pid_rec_index = pid;
                    mra->pid_rec_index_raw = 1;
                    samp_dbg_print(KERN_INFO
                             "VTUNE: update_pid_for_module_group 2  module record updated pid %x mgid %d mra %x \n",
                             pid, pmgid_info->mgid, mra);
                }
            }
            if (last_mr_found) {
                break;
            }
        }
        break;
    }
    spin_unlock(&vtune_modlist_lock);
    return;
}

#define insert_tail_list(a,b) list_add_tail(b, a)

PLIST_ENTRY
remove_head_list(PLIST_ENTRY entry)
{
    PLIST_ENTRY poped_entry = NULL;

    if (entry->next && (entry->next != entry)) {
        poped_entry = entry->next;
        list_del(poped_entry);
    }
    return (poped_entry);
}

PMR_BUF
pop_mr_buf_from_head_of_free_list(void)
{
    PLIST_ENTRY entry;

    entry = remove_head_list(&mr_buf_free_list_head);
    if (entry == &mr_buf_free_list_head) {
        entry = NULL;
    }
    return ((PMR_BUF) entry);
}

PMR_BUF
pop_mr_buf_from_head_of_out_list(void)
{
    PLIST_ENTRY entry;

    entry = remove_head_list(&mr_buf_out_list_head);
    if (entry == &mr_buf_out_list_head) {
        entry = NULL;
    }
    return ((PMR_BUF) entry);
}

void
add_mr_buf_to_out_list(PMR_BUF pmr_buf)
{
    insert_tail_list(&mr_buf_out_list_head, &pmr_buf->link);
    return;
}

void
init_mr_buf(PMR_BUF pmr_buf)
{
    if (pmr_buf) {
        memset(pmr_buf, 0, sizeof (MR_BUF));
        pmr_buf->buf_size = MR_BUFFER_DATA_SIZE;
    }
    return;
}

PMR_BUF
alloc_new_mr_buf(void)
{
    PMR_BUF pmr_buf;

    pmr_buf = (PMR_BUF) allocate_pool(non_paged_pool, sizeof (MR_BUF));
    if (pmr_buf) {
        init_mr_buf(pmr_buf);
        samp_dbg_print(KERN_INFO "VTUNE:TBS: segs buffer allocated. addr %x \n",    // 07-11-97
                 pmr_buf);  // 07-11-97
    } else {
        samp_dbg_print(KERN_INFO "VTUNE:TBS: segs buffer alloc failed.  \n",    // 07-11-97
                 pmr_buf);  // 07-11-97
    }
    return (pmr_buf);
}

void
free_mr_bufs(void)
{
    PMR_BUF pmr_buf;

    while ((pmr_buf = pop_mr_buf_from_head_of_free_list())) {
        free_pool(pmr_buf);
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: segs buffer freed from free list %x \n",
                 pmr_buf);
    }

    while ((pmr_buf = pop_mr_buf_from_head_of_out_list())) {
        free_pool(pmr_buf);
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: segs buffer freed from out list  %x \n",
                 pmr_buf);
    }
    return;
}

void
add_mr_buf_to_free_list(PMR_BUF pmr_buf)
{
    if (pmr_buf) {
        init_mr_buf(pmr_buf);
        insert_tail_list(&mr_buf_free_list_head, &pmr_buf->link);
    }
    return;
}

PMR_BUF
get_current_mr_buf(void)
{
    if (!p_current_mr_buf) {
        if (!(p_current_mr_buf = pop_mr_buf_from_head_of_free_list())) {
            p_current_mr_buf = alloc_new_mr_buf();
        }
    }
    return (p_current_mr_buf);
}

void
move_current_mr_buf_to_out_list(void)
{
    PMR_BUF pmr_buf;

    pmr_buf = p_current_mr_buf;
    p_current_mr_buf = 0;
    if (pmr_buf)
        add_mr_buf_to_out_list(pmr_buf);
    
    return;
}

void_ptr
copy_to_mr_buf(void_ptr pdata, __u32 size)
{
    void_ptr pmr;
    PMR_BUF pmr_buf;

    if (!(pmr_buf = get_current_mr_buf())) {
        return (0);
    }

    if ((pmr_buf->bytes_in_buf + size) > pmr_buf->buf_size) {   // will data fit in buffer?
        move_current_mr_buf_to_out_list();  // ..no
        return (copy_to_mr_buf(pdata, size));   // try again
    }

    pmr = &pmr_buf->buf;
    pmr += pmr_buf->bytes_in_buf;
    memcpy(pmr, pdata, size);
    pmr_buf->bytes_in_buf += size;

    if (pmr_buf->bytes_in_buf == pmr_buf->buf_size) {
        move_current_mr_buf_to_out_list();
    }

    return (pmr);
}

void
add_to_mr_buf_out_offset(__u32 rec_length)
{
    mr_buf_out_offset += rec_length;
    return;
}

module_record *
get_next_mr_rec_from_out_list(void)
{
    PMR_BUF pmr_buf;
    module_record *pmr = 0;

    //
    // return address of next module record in 
    //
    if (p_mr_buf_out) {
        if (p_mr_buf_out->bytes_in_buf) {
            pmr = (module_record *) & p_mr_buf_out->buf[mr_buf_out_offset];
            // mr_buf_out_offset += pmr->rec_length;
            if (mr_buf_out_offset < p_mr_buf_out->bytes_in_buf) {
                return ((module_record *) & p_mr_buf_out->
                    buf[mr_buf_out_offset]);
            }
        }
        pmr = 0;
        pmr_buf = xchg(&p_mr_buf_out, 0);
        mr_buf_out_offset = 0;
        add_mr_buf_to_free_list(pmr_buf);
    }

    pmr_buf = pop_mr_buf_from_head_of_out_list();
    if (pmr_buf) {
        p_mr_buf_out = pmr_buf;
        mr_buf_out_offset = 0;
        if (p_mr_buf_out->bytes_in_buf) {
            return ((module_record *) & p_mr_buf_out->buf);
        }
    }
    return (0);
}

//
// Routines to manage pid record buffers
//

PPID_BUF
pop_pid_buf_from_head_of_free_list(void)
{
    PLIST_ENTRY entry;

    entry = remove_head_list(&pid_buf_free_list_head);
    if (entry == &pid_buf_free_list_head) {
        entry = NULL;
    }
    return ((PPID_BUF) entry);
}

PPID_BUF
pop_pid_buf_from_head_of_out_list(void)
{
    PLIST_ENTRY entry;

    entry = remove_head_list(&pid_buf_out_list_head);
    if (entry == &pid_buf_out_list_head) {
        entry = NULL;
    }
    return ((PPID_BUF) entry);
}

void
add_pid_buf_to_out_list(PPID_BUF p_pid_buf)
{
    insert_tail_list(&pid_buf_out_list_head, &p_pid_buf->link);
    return;
}

void
init_pid_buf(PPID_BUF p_pid_buf)
{
    if (p_pid_buf) {
        memset(p_pid_buf, 0, sizeof (MR_BUF));
        p_pid_buf->buf_size = MR_BUFFER_DATA_SIZE;
    }
    return;
}

PPID_BUF
alloc_new_pid_buf(void)
{
    PPID_BUF p_pid_buf;

    p_pid_buf = (PPID_BUF) allocate_pool(non_paged_pool, sizeof (MR_BUF));
    if (p_pid_buf) {
        init_pid_buf(p_pid_buf);
        samp_dbg_print(KERN_INFO "VTUNE:TBS: pid  buffer allocated. addr %x \n",    // 07-11-97
                 p_pid_buf);    // 07-11-97
    } else {
        samp_dbg_print(KERN_INFO "VTUNE:TBS: pid  buffer alloc failed.  \n",    // 07-11-97
                 p_pid_buf);    // 07-11-97
    }
    return (p_pid_buf);
}

void
free_pid_bufs(void)
{
    PPID_BUF p_pid_buf;

    while ((p_pid_buf = pop_pid_buf_from_head_of_free_list())) {
        free_pool(p_pid_buf);
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: pid  buffer freed from free list %x \n",
                 p_pid_buf);
    }

    while ((p_pid_buf = pop_pid_buf_from_head_of_out_list())) {
        free_pool(p_pid_buf);
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: pid  buffer freed from free out  %x \n",
                 p_pid_buf);
    }
    return;
}

void
add_pid_buf_to_free_list(PPID_BUF p_pid_buf)
{
    if (p_pid_buf) {
        init_pid_buf(p_pid_buf);
        insert_tail_list(&pid_buf_free_list_head, &p_pid_buf->link);
    }
    return;
}

PPID_BUF
get_current_pid_buf(void)
{
    if (!p_current_pid_buf) {
        if (!(p_current_pid_buf = pop_pid_buf_from_head_of_free_list())) {
            p_current_pid_buf = alloc_new_pid_buf();
        }
    }
    return (p_current_pid_buf);
}

void
move_current_pid_buf_to_out_list(void)
{
    PPID_BUF p_pid_buf;

    p_pid_buf = p_current_pid_buf;
    p_current_pid_buf = 0;
    if (p_pid_buf) {
        add_pid_buf_to_out_list(p_pid_buf);
        /*  Not currently used on Linux
           KeSetEvent(&samp_threadEvent,     // signal sampler thread
           (KPRIORITY) 0,
           FALSE);
         */
    }
    return;
}

BOOLEAN
copy_to_pid_buf(void_ptr pdata, __u32 size)
{
    void_ptr i;
    PPID_BUF p_pid_buf;

    if (!(p_pid_buf = get_current_pid_buf())) {
        return (FALSE);
    }

    if ((p_pid_buf->bytes_in_buf + size) > p_pid_buf->buf_size) {   // will data fit in buffer?
        move_current_pid_buf_to_out_list(); // ..no
        return (copy_to_pid_buf(pdata, size));  // try again
    }

    i = &p_pid_buf->buf;
    i += p_pid_buf->bytes_in_buf;
    memcpy(i, pdata, size);
    p_pid_buf->bytes_in_buf += size;

    if (p_pid_buf->bytes_in_buf == p_pid_buf->buf_size) {
        move_current_pid_buf_to_out_list();
    }

    return (TRUE);
}

void
add_to_pid_buf_out_offset(__u32 rec_length)
{
    pid_buf_out_offset += rec_length;
    return;
}

pid_record *
get_next_pid_rec_from_out_list(void)
{
    PPID_BUF p_pid_buf;
    pid_record *ppr = 0;

    //
    // return address of next pid record in 
    //
    if (p_pid_buf_out) {
        if (p_pid_buf_out->bytes_in_buf) {
            ppr = (pid_record *) & p_pid_buf_out->buf[pid_buf_out_offset];
            // mr_buf_out_offset += pmr->rec_length;
            if (pid_buf_out_offset < p_pid_buf_out->bytes_in_buf) {
                return ((pid_record *) & p_pid_buf_out->
                    buf[pid_buf_out_offset]);
            }
        }
        ppr = 0;
        p_pid_buf = xchg(&p_pid_buf_out, 0);
        pid_buf_out_offset = 0;
        add_pid_buf_to_free_list(p_pid_buf);
    }

    p_pid_buf = pop_pid_buf_from_head_of_out_list();
    if (p_pid_buf) {
        p_pid_buf_out = p_pid_buf;
        pid_buf_out_offset = 0;
        if (p_pid_buf_out->bytes_in_buf) {
            return ((pid_record *) & p_pid_buf_out->buf);
        }
    }
    return (0);
}

int
samp_load_image_notify_routine(char *name, __u32_PTR base,__u32_PTR size,
__u32 pid, __u32 options, PMGID_INFO pmgid_info) 
{
    void_ptr p_mr_rec;
    char *raw_path;
    module_record *mra;
    char buf[sizeof (module_record) + MAXNAMELEN + 32];

    if ((samp_info.flags & SINFO_STARTED)
        && !(samp_info.flags & SINFO_STOPPING)) {

        total_loads++;  // Debug code

        mra = (module_record *) buf;
        raw_path = (char *) ((__u32_PTR) mra + sizeof (module_record));

        memset(mra, 0, sizeof (module_record));

#ifdef linux32
        mra->segment_type = (unsigned short) MODE_32BIT;
        if (pid == 0)
            mra->selector = __KERNEL_CS;
        else
            mra->selector = __USER_CS;
#elif linux64
        mra->segment_type = (unsigned short) MODE_64BIT;
#endif
        mra->load_addr64.quad_part = (__u32_PTR) base;
        mra->length64.quad_part = size;
        mra->global_module_Tb5 = options & LOPTS_GLOBAL_MODULE;
        mra->first_module_rec_in_process = options & LOPTS_1ST_MODREC;
        mra->unload_sample_count = 0xFFFFFFFF;
        if (pmgid_info && pmgid_info->mgid) {
            mra->pid_rec_index = pmgid_info->mgid;
        } else {
            mra->pid_rec_index = pid;
            mra->pid_rec_index_raw = 1; // raw pid
#ifdef DEBUG            
               if (total_loads_init) {
                   SampPrint(KERN_INFO "VTUNE:TBS: setting pid_rec_index_raw pid 0x%x %s \n", pid, name);
               }
#endif          
        }
        strncpy(raw_path, name, MAXNAMELEN);
        raw_path[MAXNAMELEN] = 0;
        mra->path_length = (__u16) strlen(raw_path) + 1;
        mra->rec_length =
            (__u16) ((sizeof (module_record) + mra->path_length + 7) & ~7);

        //
        // See if this module is the Exe for the process
        // 10/21/97 
        //
        if ((mra->load_addr64.quad_part >= app_base_low) &&
            (mra->load_addr64.quad_part < app_base_high))
            mra->exe = 1;
        else
            mra->exe = 0;

        spin_lock(&vtune_modlist_lock);
        mra->load_sample_count = pdsa->sample_count;    // get sample count while lock is held
        // ..to ensure modules records are in sample count
        // ..oreder in the sample file (.tb5 file)
        p_mr_rec = copy_to_mr_buf(mra, mra->rec_length);

        if (p_mr_rec && pmgid_info && pmgid_info->mgid) {
            if (!pmgid_info->mr_first) {
                pmgid_info->mr_first = pmgid_info->mr_last = p_mr_rec;  // save address of module record in module group
            } else {
                pmgid_info->mr_last = p_mr_rec;
            }
        }
        spin_unlock(&vtune_modlist_lock);
    }

    return (STATUS_SUCCESS);
}

//
// create Process Notify Routine
//
void
samp_create_process_notify_routine(__u32 parent_id, __u32 process_id, __u32 create)
{
    char buf[sizeof (pid_record) + 16]; // pid record + some for roundup
    pid_record *pr;

    if (create) {
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: create process..... pid %4x \n",
                 process_id);
    } else {
        samp_dbg_print(KERN_INFO
                 "VTUNE:TBS: terminate process.. pid %4x \n",
                 process_id);
        // FreeProcessinfo(process_id);
        // don't need to track terminates since we deduce terminates
        // from creates 
        return;

    }

    if ((samp_info.flags & SINFO_STARTED)
        && !(samp_info.flags & SINFO_STOPPING)) {

        pr = (pid_record *) buf;
        memset(pr, 0, sizeof (pid_record));
        pr->rec_length = (sizeof (pid_record) + 7) & ~7;    // mult of 8
        pr->pid_event = (create) ? 0 : 1;   // pid_event = 0 = process create, 1 = process terminate
        pr->os_pid = (__u32) process_id;
        pr->sample_count = pdsa->sample_count;
        pr->sample_count_term = 0xFFFFFFFF;
        pr->path_length = 0;
        pr->filename_offset = 0;

        spin_lock(&pidlist_lock);

        copy_to_pid_buf(pr, pr->rec_length);

        spin_unlock(&pidlist_lock);
    }

    return;
}

#ifndef OLDER_KERNEL_SUPPORT
MODULE_LICENSE("GPL");
#endif
EXPORT_NO_SYMBOLS;
/*
******************************************************************************
                    S Y S T E M    I N T E R F A C E
******************************************************************************
*/
