/*!
 * @fn       unc_power_hsw_Read_PMU_Data(param)
 *
 * @param    param    The read thread node to process
 * @param    id       The event id for the which the sample is generated
 *
 * @return   None     No return needed
 *
 * @brief    Read the Uncore count data and store into the buffer param;
 *           Uncore PMU does not support sampling, i.e. ignore the id parameter.
 */
static VOID
unc_power_hsw_Read_PMU_Data (
    PVOID  param
)
{
    S32                   j;
    U64                  *buffer             = read_unc_ctr_info;
    U32                   dev_idx            = *((U32*)param);
    U32                   start_index;
    DRV_CONFIG            pcfg_unc;
    U32                   this_cpu           = CONTROL_THIS_CPU();
    CPU_STATE             pcpu               = &pcb[this_cpu];
    U32                   num_cpus           = GLOBAL_STATE_num_cpus(driver_state);
    U32                   thread_event_count = 0;

    pcfg_unc    = (DRV_CONFIG)LWPMU_DEVICE_pcfg(&devices[dev_idx]);
    start_index = DRV_CONFIG_emon_unc_offset(pcfg_unc, 0);

    FOR_EACH_DATA_REG_UNC(pecb, dev_idx, i) {
        if (ECB_entries_event_scope(pecb,i) == PACKAGE_EVENT) {
            j = start_index + thread_event_count*(num_cpus-1) + ECB_entries_group_index(pecb,i) + ECB_entries_emon_event_id_index_local(pecb,i);
            if (!CPU_STATE_socket_master(pcpu)) {
                continue;
            }
        }
        else {
            j = start_index + this_cpu + thread_event_count*(num_cpus-1) + ECB_entries_group_index(pecb,i) + ECB_entries_emon_event_id_index_local(pecb,i);
            thread_event_count++;
        }
        buffer[j] = SYS_Read_MSR(ECB_entries_reg_id(pecb,i));
    } END_FOR_EACH_DATA_REG_UNC;

    return;
}
/*!
 * @fn void cpumon_Save_Cpu(param)
 *
 * @param  param - Unused, set up to enable parallel calls
 *
 * @return None     No return needed
 *
 * @brief  Set up the interrupt handler.  
 * @brief  Save the old handler for restoration when done
 *
 */
static VOID 
cpumon_Save_Cpu (
    PVOID parm
)
{
    unsigned long        eflags;
    IDTGDT_DESC          idt_base;
    CPU_STATE            pcpu = &pcb[CONTROL_THIS_CPU()];
    GATE_STRUCT          old_gate;
    GATE_STRUCT         *idt;

    SYS_Local_Irq_Save(eflags);
    SYS_Get_IDT_Base((PVOID*)&idt_base);
    idt  = idt_base.idtgdt_base;

    CPU_STATE_idt_base(pcpu) = idt;
    memcpy (&old_gate, &idt[CPU_PERF_VECTOR], 16);

    CPU_STATE_saved_ih(pcpu)  = (PVOID) ((((U64) old_gate.offset_high) << 32)   | 
                                         (((U64) old_gate.offset_middle) << 16) | 
                                          ((U64) old_gate.offset_low));
 
    SEP_PRINT_DEBUG("saved_ih is 0x%llx\n", CPU_STATE_saved_ih(pcpu));
    SYS_Local_Irq_Restore(eflags);

    return;
}
/*!
 * @fn void silvermont_Write_PMU(param)
 *
 * @param    param    dummy parameter which is not used
 *
 * @return   None     No return needed
 *
 * @brief    Initial set up of the PMU registers
 *
 * <I>Special Notes</I>
 *         Initial write of PMU registers.
 *         Walk through the enties and write the value of the register accordingly.
 *         Assumption:  For CCCR registers the enable bit is set to value 0.
 *         When current_group = 0, then this is the first time this routine is called,
 *         initialize the locks and set up EM tables.
 */
static VOID
silvermont_Write_PMU (
    VOID  *param
)
{
    U32            this_cpu = CONTROL_THIS_CPU();
    CPU_STATE      pcpu     = &pcb[this_cpu];

    if (CPU_STATE_current_group(pcpu) == 0) {
        if (EVENT_CONFIG_mode(global_ec) != EM_DISABLED) {
            U32            index;
            U32            st_index;
            U32            j;

            /* Save all the initialization values away into an array for Event Multiplexing. */
            for (j = 0; j < EVENT_CONFIG_num_groups(global_ec); j++) {
                CPU_STATE_current_group(pcpu) = j;
                st_index   = CPU_STATE_current_group(pcpu) * EVENT_CONFIG_max_gp_events(global_ec);
                FOR_EACH_DATA_GP_REG(pecb, i) {
                    index = st_index + (ECB_entries_reg_id(pecb,i) - IA32_FULL_PMC0);
                    CPU_STATE_em_tables(pcpu)[index] = ECB_entries_reg_value(pecb,i);
                } END_FOR_EACH_DATA_GP_REG;
            }
            /* Reset the current group to the very first one. */
            CPU_STATE_current_group(pcpu) = this_cpu % EVENT_CONFIG_num_groups(global_ec);
        }
示例#4
0
/*!
 * @fn void cpumon_Init_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Set up the interrupt handler.  
 *
 */
static VOID 
cpumon_Init_Cpu (
    PVOID parm
)
{
    unsigned long        eflags;
    U64                 *idt_base;
    CPU_STATE            pcpu;
    local_handler_t      lhandler;

    preempt_disable();
    pcpu = &pcb[CONTROL_THIS_CPU()];
    preempt_enable();
    SYS_Local_Irq_Save(eflags);
    
    idt_base = CPU_STATE_idt_base(pcpu);
    // install perf. handler
    // These are the necessary steps to have an ISR entry
    // Note the changes in the data written
    lhandler.u64[0] = (unsigned long)SYS_Perfvec_Handler;
    lhandler.u16[3] = lhandler.u16[1];
    lhandler.u16[1] = SYS_Get_cs();
    lhandler.u16[2] = 0xee00;

    idt_base[CPU_PERF_VECTOR] = lhandler.u64[0];
    SYS_Local_Irq_Restore(eflags);
    return;
}
/*!
 * @fn void cpumon_Init_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Set up the interrupt handler.  
 *
 */
static VOID 
cpumon_Init_Cpu (
    PVOID parm
)
{
    unsigned long        eflags;
    CPU_STATE            pcpu = &pcb[CONTROL_THIS_CPU()];
    GATE_STRUCT         *idt;

    SYS_Local_Irq_Save(eflags);
    idt = CPU_STATE_idt_base(pcpu);
    cpumon_Set_IDT_Func(idt, SYS_Perfvec_Handler);
    SYS_Local_Irq_Restore(eflags);

    return;
}
/*!
 * @fn corei7_Read_PMU_Data(param)
 *
 * @param    param    dummy parameter which is not used
 *
 * @return   None     No return needed
 *
 * @brief    Read all the data MSR's into a buffer.  Called by the interrupt handler.
 *
 */
static VOID
corei7_unc_Read_PMU_Data(
    PVOID   param
)
{
    S32       start_index, j;
    U64      *buffer    = read_counter_info;
    U32       this_cpu  = CONTROL_THIS_CPU();

    start_index = DRV_CONFIG_num_events(pcfg) * this_cpu;
    SEP_PRINT_DEBUG("PMU control_data 0x%p, buffer 0x%p, j = %d\n", PMU_register_data, buffer, j);
    FOR_EACH_DATA_REG(pecb_unc,i) {
        j = start_index + ECB_entries_event_id_index(pecb_unc,i);
        buffer[j] = SYS_Read_MSR(ECB_entries_reg_id(pecb_unc,i));
        SEP_PRINT_DEBUG("this_cpu %d, event_id %d, value 0x%llx\n", this_cpu, i, buffer[j]);
    }
/*!
 * @fn unc_power_avt_Read_PMU_Data(param)
 *
 * @param    param    The read thread node to process
 *
 * @return   None     No return needed
 *
 * @brief    Read the Uncore count data and store into the buffer param;
 *           Uncore PMU does not support sampling, i.e. ignore the id parameter.
 */
static VOID
unc_power_avt_Read_PMU_Data (
    PVOID  param
)
{
    S32                   j;
    U64                  *buffer              = read_unc_ctr_info;
    U32                   dev_idx             = *((U32*)param);
    U32                   start_index;
    DRV_CONFIG            pcfg_unc;
    U32                   this_cpu            = CONTROL_THIS_CPU();
    CPU_STATE             pcpu                = &pcb[this_cpu];
    U32                   num_cpus            = GLOBAL_STATE_num_cpus(driver_state);
    U32                   cur_grp             = LWPMU_DEVICE_cur_group(&devices[(dev_idx)]);
    U32                   package_event_count = 0;
    U32                   thread_event_count  = 0;
    U32                   module_event_count  = 0;

    pcfg_unc    = (DRV_CONFIG)LWPMU_DEVICE_pcfg(&devices[dev_idx]);
    start_index = DRV_CONFIG_emon_unc_offset(pcfg_unc, cur_grp);

    FOR_EACH_DATA_REG_UNC(pecb, dev_idx, i) {
        j =   start_index + ECB_entries_group_index(pecb,i)  +
               package_event_count*num_packages +
               module_event_count*(GLOBAL_STATE_num_modules(driver_state)) +
               thread_event_count*num_cpus ;
        if (ECB_entries_event_scope(pecb,i) == PACKAGE_EVENT) {
            j = j + core_to_package_map[this_cpu];
            package_event_count++;
            if (!CPU_STATE_socket_master(pcpu)) {
                continue;
            }
        }
        else if (ECB_entries_event_scope(pecb,i) == MODULE_EVENT) {
            j = j + CPU_STATE_cpu_module_num(pcpu);
            module_event_count++;
            if (!CPU_STATE_cpu_module_master(pcpu)) {
                continue;
            }
        }
        else {
            j = j + this_cpu;
            thread_event_count++;
        }
        buffer[j] = SYS_Read_MSR(ECB_entries_reg_id(pecb,i));
        //SEP_PRINT_DEBUG("cpu=%d j=%d mec=%d mid=%d tec=%d i=%d gi=%d ei=%d count=%llu\n", this_cpu, j, module_event_count, CPU_STATE_cpu_module_num(pcpu), thread_event_count, i, ECB_entries_group_index(pecb,i), ECB_entries_emon_event_id_index_local(pecb,i), buffer[j]);
    } END_FOR_EACH_DATA_REG_UNC;
/*!
 * @fn void cpumon_Destroy_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Restore the old handler
 * @brief  Finish clean up of the apic
 *
 */
static VOID 
cpumon_Destroy_Cpu (
    PVOID ctx
)
{
    unsigned long        eflags;
    CPU_STATE            pcpu = &pcb[CONTROL_THIS_CPU()];
    GATE_STRUCT         *idt;

    SYS_Local_Irq_Save(eflags);
    APIC_Disable_PMI();
    idt = CPU_STATE_idt_base(pcpu);
    cpumon_Set_IDT_Func(idt, CPU_STATE_saved_ih(pcpu));
    SYS_Local_Irq_Restore(eflags);

    return;
}
/*!
 * @fn void cpumon_Init_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Set up the interrupt handler.  
 *
 */
static VOID 
cpumon_Init_Cpu (
    PVOID parm
)
{
    unsigned long        eflags;
    U64                 *idt_base;
    CPU_STATE            pcpu;
    local_handler_t      lhandler;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    unsigned long        cr0_value;
#endif

    preempt_disable();
    pcpu = &pcb[CONTROL_THIS_CPU()];
    preempt_enable();
    SYS_Local_Irq_Save(eflags);
    
    idt_base = CPU_STATE_idt_base(pcpu);
    // install perf. handler
    // These are the necessary steps to have an ISR entry
    // Note the changes in the data written
    lhandler.u64[0] = (unsigned long)SYS_Perfvec_Handler;
    lhandler.u16[3] = lhandler.u16[1];
    lhandler.u16[1] = SYS_Get_cs();
    lhandler.u16[2] = 0xee00;

    // From 3.10 kernel, the IDT memory has been moved to a read-only location
    // which is controlled by the bit 16 in the CR0 register.
    // The write protection should be temporarily released to update the IDT.
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    cr0_value = read_cr0();
    write_cr0(cr0_value & ~X86_CR0_WP);
#endif
    idt_base[CPU_PERF_VECTOR] = lhandler.u64[0];
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    write_cr0(cr0_value);
#endif

    SYS_Local_Irq_Restore(eflags);
    return;
}
示例#10
0
/*!
 * @fn void cpumon_Save_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Save the old handler for restoration when done
 *
 */
static void 
cpumon_Save_Cpu (
    PVOID parm
)
{
    unsigned long        eflags;
    U64                 *idt_base;
    CPU_STATE            pcpu;

    preempt_disable();
    pcpu = &pcb[CONTROL_THIS_CPU()];
    preempt_enable();

    SYS_Local_Irq_Save(eflags);
    CPU_STATE_idt_base(pcpu) = idt_base = SYS_Get_IDT_Base();
    // save original perf. vector
    CPU_STATE_saved_ih(pcpu) = idt_base[CPU_PERF_VECTOR];
    SEP_PRINT_DEBUG("saved_ih is 0x%llx\n", CPU_STATE_saved_ih(pcpu));
    SYS_Local_Irq_Restore(eflags);
    return;
}
示例#11
0
/*!
 * @fn void cpumon_Destroy_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Restore the old handler
 * @brief  Finish clean up of the apic
 *
 */
static VOID 
cpumon_Destroy_Cpu (
    PVOID ctx
)
{
    unsigned long        eflags;
    unsigned long long  *idt_base;
    CPU_STATE            pcpu;
    preempt_disable();
    pcpu = &pcb[CONTROL_THIS_CPU()];
    preempt_enable();

    SYS_Local_Irq_Save(eflags);
    // restore perf. vector (to a safe stub pointer)
    idt_base = SYS_Get_IDT_Base();
    APIC_Disable_PMI();
    idt_base[CPU_PERF_VECTOR] = CPU_STATE_saved_ih(pcpu);
    SYS_Local_Irq_Restore(eflags);

    return;
}
示例#12
0
/*!
 * @fn void cpumon_Destroy_Cpu(param)
 *
 * @param    param    unused parameter
 *
 * @return   None     No return needed
 *
 * @brief  Restore the old handler
 * @brief  Finish clean up of the apic
 *
 */
static VOID 
cpumon_Destroy_Cpu (
    PVOID ctx
)
{
    unsigned long        eflags;
    unsigned long long  *idt_base;
    CPU_STATE            pcpu;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    unsigned long        cr0_value;
#endif

    preempt_disable();
    pcpu = &pcb[CONTROL_THIS_CPU()];
    preempt_enable();

    SYS_Local_Irq_Save(eflags);
    // restore perf. vector (to a safe stub pointer)
    idt_base = SYS_Get_IDT_Base();
    APIC_Disable_PMI();

    // From 3.10 kernel, the IDT memory has been moved to a read-only location
    // which is controlled by the bit 16 in the CR0 register.
    // The write protection should be temporarily released to update the IDT.
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    cr0_value = read_cr0();
    write_cr0(cr0_value & ~X86_CR0_WP);
#endif
    idt_base[CPU_PERF_VECTOR] = CPU_STATE_saved_ih(pcpu);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
    write_cr0(cr0_value);
#endif

    SYS_Local_Irq_Restore(eflags);

    return;
}
示例#13
0
/*!
 * @fn          static U32 chap_Read_Counters(PVOID param)
 * 
 * @brief       Read the CHAP counter data
 *
 * @param       PVOID param - address of the buffer to write into
 * 
 * @return      None
 *
 * <I>Special Notes:</I>
 *             <NONE>
 */
static VOID
chap_Read_Counters (
    PVOID  param
)
{
    U64            *data;
    CHAP_INTERFACE  chap;
    U32             mch_cpu;
    int             i, data_index;
    U64             tmp_data;
    U64            *mch_data;
    U64            *ich_data;
    U64            *mmio_data;
    U64            *mmio;
    U32             this_cpu        = CONTROL_THIS_CPU();
    CHIPSET_SEGMENT mch_chipset_seg = &CHIPSET_CONFIG_mch(pma);
    CHIPSET_SEGMENT ich_chipset_seg = &CHIPSET_CONFIG_ich(pma);
    CHIPSET_SEGMENT noa_chipset_seg = &CHIPSET_CONFIG_noa(pma);

    data       = param;
    data_index = 0;

    // Save the Motherboard time.  This is universal time for this
    // system.  This is the only 64-bit timer so we save it first so
    // always aligned on 64-bit boundary that way.

    if (CHIPSET_CONFIG_mch_chipset(pma)) {
        mch_data = data + data_index;
        // Save the MCH counters.
        chap = (CHAP_INTERFACE)(UIOP)CHIPSET_SEGMENT_virtual_address(mch_chipset_seg);
        for (i = CHIPSET_SEGMENT_start_register(mch_chipset_seg);
                        i < CHIPSET_SEGMENT_total_events(mch_chipset_seg); i++) {
            CHAP_INTERFACE_command_register(&chap[i]) = 0x00020000; // Sample
        }

        // The StartingReadRegister is only used for special event
        // configs that use CHAP counters to trigger events in other
        // CHAP counters.  This is an unusual request but useful in
        // getting the number of lit subspans - implying a count of the
        // number of triangles.  I am not sure it will be used
        // elsewhere.  We cannot read some of the counters because it
        // will invalidate their configuration to trigger other CHAP
        // counters.  Yuk!
        data_index += CHIPSET_SEGMENT_start_register(mch_chipset_seg);
        for (i = CHIPSET_SEGMENT_start_register(mch_chipset_seg);
                        i < CHIPSET_SEGMENT_total_events(mch_chipset_seg); i++) {
            data[data_index++] = CHAP_INTERFACE_data_register(&chap[i]);
        }

        // Initialize the counters on the first interrupt
        if (pcb[this_cpu].chipset_count_init == TRUE) {
            for (i = 0; i < CHIPSET_SEGMENT_total_events(mch_chipset_seg); i++) {
                pcb[this_cpu].last_mch_count[i] = mch_data[i];
            }
        }

        // Now compute the delta!
        // NOTE: Special modification to accomodate Gen 4 work - count
        // everything since last interrupt - regardless of cpu!  This
        // way there is only one count of the Gen 4 counters.
        //
        mch_cpu = CHIPSET_CONFIG_host_proc_run(pma) ? this_cpu : 0;
        for (i = 0; i < CHIPSET_SEGMENT_total_events(mch_chipset_seg); i++) {
            tmp_data = mch_data[i];
            if (mch_data[i] < pcb[mch_cpu].last_mch_count[i]) {
                mch_data[i] = mch_data[i] + (U32)(-1) - pcb[mch_cpu].last_mch_count[i];
            }
            else {
                mch_data[i] = mch_data[i] - pcb[mch_cpu].last_mch_count[i];
            }
            pcb[mch_cpu].last_mch_count[i] = tmp_data;
        }
    }

    if (CHIPSET_CONFIG_ich_chipset(pma)) {
        // Save the ICH counters.
        ich_data = data + data_index;
        chap = (CHAP_INTERFACE)(UIOP)CHIPSET_SEGMENT_virtual_address(ich_chipset_seg);
        for (i = 0; i < CHIPSET_SEGMENT_total_events(ich_chipset_seg); i++) {
            CHAP_INTERFACE_command_register(&chap[i]) = 0x00020000; // Sample
        }

        for (i = 0; i < CHIPSET_SEGMENT_total_events(ich_chipset_seg); i++) {
            data[data_index++] = CHAP_INTERFACE_data_register(&chap[i]);
        }

        // Initialize the counters on the first interrupt
        if (pcb[this_cpu].chipset_count_init == TRUE) {
            for (i = 0; i < CHIPSET_SEGMENT_total_events(ich_chipset_seg); i++) {
                pcb[this_cpu].last_ich_count[i] = ich_data[i];
            }
        }

        // Now compute the delta!
        for (i = 0; i < CHIPSET_SEGMENT_total_events(ich_chipset_seg); i++) {
            tmp_data = ich_data[i];
            if (ich_data[i] < pcb[this_cpu].last_ich_count[i]) {
                ich_data[i] = ich_data[i] + (U32)(-1) - pcb[this_cpu].last_ich_count[i];
            }
            else {
                ich_data[i] = ich_data[i] - pcb[this_cpu].last_ich_count[i];
            }
            pcb[this_cpu].last_ich_count[i] = tmp_data;
        }
    }

    if (CHIPSET_CONFIG_noa_chipset(pma)) {
        // Save the MMIO counters.
        mmio_data = data + data_index;
        mmio      = (U64 *) (UIOP)CHIPSET_SEGMENT_virtual_address(noa_chipset_seg);

        for (i = 0; i < CHIPSET_SEGMENT_total_events(noa_chipset_seg); i++) {
            data[data_index++] = mmio[i*2 + 2244]; // 64-bit quantity
        }

        // Initialize the counters on the first interrupt
        if (pcb[this_cpu].chipset_count_init == TRUE) {
            for (i = 0; i < CHIPSET_SEGMENT_total_events(noa_chipset_seg); i++) {
                pcb[this_cpu].last_mmio_count[i] = mmio_data[i];
            }
        }

        // Now compute the delta!
        for (i = 0; i < CHIPSET_SEGMENT_total_events(noa_chipset_seg); i++) {
            tmp_data = mmio_data[i];
            if (mmio_data[i] < pcb[this_cpu].last_mmio_count[i]) {
                mmio_data[i] = mmio_data[i] + (U32)(-1) - pcb[this_cpu].last_mmio_count[i];
            }
            else {
                mmio_data[i] = mmio_data[i] - pcb[this_cpu].last_mmio_count[i];
            }
            pcb[this_cpu].last_mmio_count[i] = tmp_data;
        }
    }

    pcb[this_cpu].chipset_count_init = FALSE;

    FOR_EACH_DATA_REG(pecb,i) {
            data[data_index++] = SYS_Read_MSR(ECB_entries_reg_id(pecb,i));
            SYS_Write_MSR(ECB_entries_reg_id(pecb,i), (U64)0);
    } END_FOR_EACH_DATA_REG;
/*!
 * @fn          static VOID snbunc_imc_Write_PMU(VOID*)
 * 
 * @brief       Initial write of PMU registers
 *              Walk through the enties and write the value of the register accordingly.
 *              When current_group = 0, then this is the first time this routine is called,
 *
 * @param       None
 * 
 * @return      None
 *
 * <I>Special Notes:</I>
 */
static VOID
snbunc_imc_Write_PMU (
    VOID  *param
)
{
    
    DRV_PCI_DEVICE_ENTRY_NODE  dpden;
    U32                        pci_address;
    U32                        bar_lo;
    U64                        next_bar_offset;
    U64                        bar_hi;
    U64                        physical_address;
    U64                        final_bar;
    U32                        dev_idx   = *((U32*)param);
    ECB                        pecb      = LWPMU_DEVICE_PMU_register_data(&devices[(dev_idx)])[0];
    U32                        j;
    U32                        event_id   = 0;
    U32                        offset_delta;
    U32                        tmp_value;
    int                        me      = CONTROL_THIS_CPU();
    
    if (me != invoking_processor_id) {
        return;
    }

    SEP_PRINT_DEBUG("snbunc_imc_Write_PMU Enter\n");
    dpden = ECB_pcidev_entry_node(pecb);
    pci_address = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_dev_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_func_no(&dpden),
                                0);

#if defined(MYDEBUG)
    {
    U32 device_id  = PCI_Read_Ulong(pci_address);
    SEP_PRINT("Bus no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_bus_no(&dpden));
    SEP_PRINT("Dev no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_dev_no(&dpden));
    SEP_PRINT("Func no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_func_no(&dpden));
    SEP_PRINT("value for device id = 0x%x\n",device_id);
    }
#endif

    pci_address = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_dev_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_func_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_bar_offset(&dpden));
    bar_lo      = PCI_Read_Ulong(pci_address);
        
    next_bar_offset     = DRV_PCI_DEVICE_ENTRY_bar_offset(&dpden) + NEXT_ADDR_OFFSET;
    pci_address         = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_dev_no(&dpden),
                                DRV_PCI_DEVICE_ENTRY_func_no(&dpden),
                                next_bar_offset);
    bar_hi              = PCI_Read_Ulong(pci_address);
    final_bar = (bar_hi << SNBUNC_IMC_BAR_ADDR_SHIFT) | bar_lo;
    final_bar &= SNBUNC_IMC_BAR_ADDR_MASK;

    DRV_PCI_DEVICE_ENTRY_bar_address(&ECB_pcidev_entry_node(pecb)) = final_bar;
    physical_address     = DRV_PCI_DEVICE_ENTRY_bar_address(&ECB_pcidev_entry_node(pecb))
                                 + DRV_PCI_DEVICE_ENTRY_base_offset_for_mmio(&ECB_pcidev_entry_node(pecb));
    virtual_address      = ioremap_nocache(physical_address,4096); 
    //Read in the counts into temporary buffer
    FOR_EACH_PCI_DATA_REG(pecb,i,dev_idx,offset_delta) {
            event_id                            = ECB_entries_event_id_index_local(pecb,i);
            tmp_value                           = readl((U32*)((char*)(virtual_address) + offset_delta));
            for ( j = 0; j < (U32)GLOBAL_STATE_num_cpus(driver_state) ; j++) {
                   LWPMU_DEVICE_prev_val_per_thread(&devices[dev_idx])[j][event_id + 1] = tmp_value; // need to account for group id
#if defined(MYDEBUG)
                   SEP_PRINT_DEBUG("initial value for i =%d is 0x%x\n",i,LWPMU_DEVICE_prev_val_per_thread(&devices[dev_idx])[j][i]);

#endif
            }

            // this is needed for overflow detection of the accumulators.
            if (LWPMU_DEVICE_counter_mask(&devices[dev_idx]) == 0) {
                LWPMU_DEVICE_counter_mask(&devices[dev_idx]) = (U64)ECB_entries_max_bits(pecb,i);
            }
    } END_FOR_EACH_PCI_DATA_REG;