/* * @fn OS_STATUS OUTPUT_Flush() * * @brief Flush the module buffers and sample buffers * * @return OS_STATUS * * For each CPU in the system, set buffer full to the byte count to flush. * Flush the modules buffer, as well. * */ extern int OUTPUT_Flush ( VOID ) { int i; int writers = 0; OUTPUT outbuf; /* * Flush all remaining data to files * set up a flush event */ init_waitqueue_head(&flush_queue); SEP_PRINT_DEBUG("flush: waiting for %d writers\n",(GLOBAL_STATE_num_cpus(driver_state)+ OTHER_C_DEVICES)); for (i = 0; i < GLOBAL_STATE_num_cpus(driver_state); i++) { if (CPU_STATE_initial_mask(&pcb[i]) == 0) { continue; } outbuf = &(cpu_buf[i].outbuf); writers += 1; OUTPUT_buffer_full(outbuf,OUTPUT_current_buffer(outbuf)) = OUTPUT_total_buffer_size(outbuf) - OUTPUT_remaining_buffer_size(outbuf); } atomic_set(&flush_writers, writers + OTHER_C_DEVICES); // Flip the switch to terminate the output threads // Do not do this earlier, as threads may terminate before all the data is flushed flush = 1; for (i = 0; i < GLOBAL_STATE_num_cpus(driver_state); i++) { if (CPU_STATE_initial_mask(&pcb[i]) == 0) { continue; } outbuf = &BUFFER_DESC_outbuf(&cpu_buf[i]); OUTPUT_buffer_full(outbuf,OUTPUT_current_buffer(outbuf)) = OUTPUT_total_buffer_size(outbuf) - OUTPUT_remaining_buffer_size(outbuf); wake_up_interruptible_sync(&BUFFER_DESC_queue(&cpu_buf[i])); } // Flush all data from the module buffers outbuf = &BUFFER_DESC_outbuf(module_buf); OUTPUT_buffer_full(outbuf,OUTPUT_current_buffer(outbuf)) = OUTPUT_total_buffer_size(outbuf) - OUTPUT_remaining_buffer_size(outbuf); SEP_PRINT_DEBUG("OUTPUT_Flush - waking up module_queue\n"); wake_up_interruptible_sync(&BUFFER_DESC_queue(module_buf)); //Wait for buffers to empty if (wait_event_interruptible(flush_queue, atomic_read(&flush_writers)==0)) { return OS_RESTART_SYSCALL; } SEP_PRINT_DEBUG("OUTPUT_Flush - awakened from flush_queue\n"); flush = 0; return 0; }
/*! * @fn unc_power_hsw_Read_PMU_Data(param) * * @param param The read thread node to process * @param id The event id for the which the sample is generated * * @return None No return needed * * @brief Read the Uncore count data and store into the buffer param; * Uncore PMU does not support sampling, i.e. ignore the id parameter. */ static VOID unc_power_hsw_Read_PMU_Data ( PVOID param ) { S32 j; U64 *buffer = read_unc_ctr_info; U32 dev_idx = *((U32*)param); U32 start_index; DRV_CONFIG pcfg_unc; U32 this_cpu = CONTROL_THIS_CPU(); CPU_STATE pcpu = &pcb[this_cpu]; U32 num_cpus = GLOBAL_STATE_num_cpus(driver_state); U32 thread_event_count = 0; pcfg_unc = (DRV_CONFIG)LWPMU_DEVICE_pcfg(&devices[dev_idx]); start_index = DRV_CONFIG_emon_unc_offset(pcfg_unc, 0); FOR_EACH_DATA_REG_UNC(pecb, dev_idx, i) { if (ECB_entries_event_scope(pecb,i) == PACKAGE_EVENT) { j = start_index + thread_event_count*(num_cpus-1) + ECB_entries_group_index(pecb,i) + ECB_entries_emon_event_id_index_local(pecb,i); if (!CPU_STATE_socket_master(pcpu)) { continue; } } else { j = start_index + this_cpu + thread_event_count*(num_cpus-1) + ECB_entries_group_index(pecb,i) + ECB_entries_emon_event_id_index_local(pecb,i); thread_event_count++; } buffer[j] = SYS_Read_MSR(ECB_entries_reg_id(pecb,i)); } END_FOR_EACH_DATA_REG_UNC; return; }
/* * @fn extern void OUTPUT_Initialize(buffer, len) * * @param buffer - seed name of the output file * @param len - length of the seed name * @returns None * @brief Allocate, initialize, and return all output data structure * * <I>Special Notes:</I> * Initialize the output structures. * For each CPU in the system, allocate the output buffers. * Initialize a module buffer and temp file to hold module information * Initialize the read queues for each sample buffer * */ extern OS_STATUS OUTPUT_Initialize ( char *buffer, unsigned long len ) { BUFFER_DESC unused; int i; OS_STATUS status = OS_SUCCESS; flush = 0; for (i = 0; i < GLOBAL_STATE_num_cpus(driver_state); i++) { unused = output_Initialized_Buffers(&cpu_buf[i], 1); if (!unused) { SEP_PRINT_ERROR("OUTPUT_Initialize: Failed to allocate cpu output buffers\n"); OUTPUT_Destroy(); return OS_NO_MEM; } } /* * Just need one module buffer */ module_buf = output_Initialized_Buffers(module_buf, MODULE_BUFF_SIZE); if (!module_buf) { SEP_PRINT_ERROR("OUTPUT_Initialize: Failed to create module output buffers\n"); OUTPUT_Destroy(); return OS_NO_MEM; } return status; }
/*! * @fn static U32 chap_Init_Chipset(void) * * @brief Chipset PMU initialization * * @param None * * @return VT_SUCCESS if successful, otherwise error * * <I>Special Notes:</I> * <NONE> */ static U32 chap_Init_Chipset ( VOID ) { U32 i; CHIPSET_SEGMENT mch_chipset_seg = &CHIPSET_CONFIG_mch(pma); CHIPSET_SEGMENT ich_chipset_seg = &CHIPSET_CONFIG_ich(pma); CHIPSET_SEGMENT noa_chipset_seg = &CHIPSET_CONFIG_noa(pma); SEP_PRINT_DEBUG("Initializing chipset ...\n"); if (DRV_CONFIG_enable_chipset(pcfg)) { for (i=0; i < GLOBAL_STATE_num_cpus(driver_state); i++) { pcb[i].chipset_count_init = TRUE; } if (CHIPSET_CONFIG_mch_chipset(pma)) { if (CHIPSET_SEGMENT_virtual_address(mch_chipset_seg) == 0) { // Map the virtual address of the PCI CHAP interface. CHIPSET_SEGMENT_virtual_address(mch_chipset_seg) = (U64) (UIOP) ioremap_nocache( CHIPSET_SEGMENT_physical_address(mch_chipset_seg), CHIPSET_SEGMENT_size(mch_chipset_seg)); } } if (CHIPSET_CONFIG_ich_chipset(pma)) { if (CHIPSET_SEGMENT_virtual_address(ich_chipset_seg) == 0) { // Map the virtual address of the PCI CHAP interface. CHIPSET_SEGMENT_virtual_address(ich_chipset_seg) = (U64) (UIOP) ioremap_nocache( CHIPSET_SEGMENT_physical_address(ich_chipset_seg), CHIPSET_SEGMENT_size(ich_chipset_seg)); } } // Here we map the MMIO registers for the Gen X processors. if (CHIPSET_CONFIG_noa_chipset(pma)) { if (CHIPSET_SEGMENT_virtual_address(noa_chipset_seg) == 0) { // Map the virtual address of the PCI CHAP interface. CHIPSET_SEGMENT_virtual_address(noa_chipset_seg) = (U64) (UIOP) ioremap_nocache( CHIPSET_SEGMENT_physical_address(noa_chipset_seg), CHIPSET_SEGMENT_size(noa_chipset_seg)); } } // // always collect processor events // CHIPSET_CONFIG_processor(pma) = 1; } else { CHIPSET_CONFIG_processor(pma) = 0; } SEP_PRINT_DEBUG("Initializing chipset done.\n"); return VT_SUCCESS; }
/*! * @fn unc_power_avt_Read_PMU_Data(param) * * @param param The read thread node to process * * @return None No return needed * * @brief Read the Uncore count data and store into the buffer param; * Uncore PMU does not support sampling, i.e. ignore the id parameter. */ static VOID unc_power_avt_Read_PMU_Data ( PVOID param ) { S32 j; U64 *buffer = read_unc_ctr_info; U32 dev_idx = *((U32*)param); U32 start_index; DRV_CONFIG pcfg_unc; U32 this_cpu = CONTROL_THIS_CPU(); CPU_STATE pcpu = &pcb[this_cpu]; U32 num_cpus = GLOBAL_STATE_num_cpus(driver_state); U32 cur_grp = LWPMU_DEVICE_cur_group(&devices[(dev_idx)]); U32 package_event_count = 0; U32 thread_event_count = 0; U32 module_event_count = 0; pcfg_unc = (DRV_CONFIG)LWPMU_DEVICE_pcfg(&devices[dev_idx]); start_index = DRV_CONFIG_emon_unc_offset(pcfg_unc, cur_grp); FOR_EACH_DATA_REG_UNC(pecb, dev_idx, i) { j = start_index + ECB_entries_group_index(pecb,i) + package_event_count*num_packages + module_event_count*(GLOBAL_STATE_num_modules(driver_state)) + thread_event_count*num_cpus ; if (ECB_entries_event_scope(pecb,i) == PACKAGE_EVENT) { j = j + core_to_package_map[this_cpu]; package_event_count++; if (!CPU_STATE_socket_master(pcpu)) { continue; } } else if (ECB_entries_event_scope(pecb,i) == MODULE_EVENT) { j = j + CPU_STATE_cpu_module_num(pcpu); module_event_count++; if (!CPU_STATE_cpu_module_master(pcpu)) { continue; } } else { j = j + this_cpu; thread_event_count++; } buffer[j] = SYS_Read_MSR(ECB_entries_reg_id(pecb,i)); //SEP_PRINT_DEBUG("cpu=%d j=%d mec=%d mid=%d tec=%d i=%d gi=%d ei=%d count=%llu\n", this_cpu, j, module_event_count, CPU_STATE_cpu_module_num(pcpu), thread_event_count, i, ECB_entries_group_index(pecb,i), ECB_entries_emon_event_id_index_local(pecb,i), buffer[j]); } END_FOR_EACH_DATA_REG_UNC;
/* * @fn extern void OUTPUT_Destroy() * * @param buffer - seed name of the output file * @param len - length of the seed name * @returns OS_STATUS * @brief Deallocate output structures * * <I>Special Notes:</I> * Free the module buffers * For each CPU in the system, free the sampling buffers */ extern int OUTPUT_Destroy ( VOID ) { int i, n; OUTPUT outbuf = &BUFFER_DESC_outbuf(module_buf); output_Free_Buffers(module_buf, OUTPUT_total_buffer_size(outbuf)); if (cpu_buf != NULL) { n = GLOBAL_STATE_num_cpus(driver_state); for (i = 0; i < n; i++) { outbuf = &BUFFER_DESC_outbuf(&cpu_buf[i]); output_Free_Buffers(&cpu_buf[i], OUTPUT_total_buffer_size(outbuf)); } } return 0; }
/*! * @fn extern void CPUMON_Remove_Cpuhools(void) * * @param None * * @return None No return needed * * @brief De-Initialize the APIC in phases * @brief clean up the interrupt handler (on a per-processor basis) * */ extern VOID CPUMON_Remove_Cpuhooks ( void ) { int i; unsigned long eflags; SYS_Local_Irq_Save(eflags); cpumon_Destroy_Cpu((PVOID)(size_t)0); SYS_Local_Irq_Restore(eflags); CONTROL_Invoke_Parallel_XS(cpumon_Destroy_Cpu, (PVOID)(size_t)0); // de-initialize APIC APIC_Unmap(CPU_STATE_apic_linear_addr(&pcb[0])); for (i = 0; i < GLOBAL_STATE_num_cpus(driver_state); i++) { APIC_Deinit_Phase1(i); } return; }
/****************************************************************************************** * @fn static VOID unc_power_snb_Write_PMU(VOID*) * * @brief No registers to write and setup the accumalators with initial values * * @return None * * <I>Special Notes:</I> ******************************************************************************************/ static VOID unc_power_snb_Write_PMU ( VOID *param ) { U32 dev_idx = *((U32*)param); U64 tmp_value = 0; U32 j; U32 event_id = 0; FOR_EACH_REG_ENTRY_UNC(pecb, dev_idx, i) { for ( j = 0; j < (U32)GLOBAL_STATE_num_cpus(driver_state); j++) { tmp_value = SYS_Read_MSR(ECB_entries_reg_id(pecb,i)) & SNB_POWER_MSR_DATA_MASK; LWPMU_DEVICE_prev_val_per_thread(&devices[dev_idx])[j][event_id + 1] = tmp_value; // need to account for group id } // Initialize counter_mask for accumulators if (LWPMU_DEVICE_counter_mask(&devices[dev_idx]) == 0) { LWPMU_DEVICE_counter_mask(&devices[dev_idx]) = (U64)ECB_entries_max_bits(pecb,i); } } END_FOR_EACH_REG_ENTRY_UNC; return; }
/*! * @fn static VOID snbunc_imc_Write_PMU(VOID*) * * @brief Initial write of PMU registers * Walk through the enties and write the value of the register accordingly. * When current_group = 0, then this is the first time this routine is called, * * @param None * * @return None * * <I>Special Notes:</I> */ static VOID snbunc_imc_Write_PMU ( VOID *param ) { DRV_PCI_DEVICE_ENTRY_NODE dpden; U32 pci_address; U32 bar_lo; U64 next_bar_offset; U64 bar_hi; U64 physical_address; U64 final_bar; U32 dev_idx = *((U32*)param); ECB pecb = LWPMU_DEVICE_PMU_register_data(&devices[(dev_idx)])[0]; U32 j; U32 event_id = 0; U32 offset_delta; U32 tmp_value; int me = CONTROL_THIS_CPU(); if (me != invoking_processor_id) { return; } SEP_PRINT_DEBUG("snbunc_imc_Write_PMU Enter\n"); dpden = ECB_pcidev_entry_node(pecb); pci_address = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden), DRV_PCI_DEVICE_ENTRY_dev_no(&dpden), DRV_PCI_DEVICE_ENTRY_func_no(&dpden), 0); #if defined(MYDEBUG) { U32 device_id = PCI_Read_Ulong(pci_address); SEP_PRINT("Bus no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_bus_no(&dpden)); SEP_PRINT("Dev no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_dev_no(&dpden)); SEP_PRINT("Func no = 0x%x\n",DRV_PCI_DEVICE_ENTRY_func_no(&dpden)); SEP_PRINT("value for device id = 0x%x\n",device_id); } #endif pci_address = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden), DRV_PCI_DEVICE_ENTRY_dev_no(&dpden), DRV_PCI_DEVICE_ENTRY_func_no(&dpden), DRV_PCI_DEVICE_ENTRY_bar_offset(&dpden)); bar_lo = PCI_Read_Ulong(pci_address); next_bar_offset = DRV_PCI_DEVICE_ENTRY_bar_offset(&dpden) + NEXT_ADDR_OFFSET; pci_address = FORM_PCI_ADDR(DRV_PCI_DEVICE_ENTRY_bus_no(&dpden), DRV_PCI_DEVICE_ENTRY_dev_no(&dpden), DRV_PCI_DEVICE_ENTRY_func_no(&dpden), next_bar_offset); bar_hi = PCI_Read_Ulong(pci_address); final_bar = (bar_hi << SNBUNC_IMC_BAR_ADDR_SHIFT) | bar_lo; final_bar &= SNBUNC_IMC_BAR_ADDR_MASK; DRV_PCI_DEVICE_ENTRY_bar_address(&ECB_pcidev_entry_node(pecb)) = final_bar; physical_address = DRV_PCI_DEVICE_ENTRY_bar_address(&ECB_pcidev_entry_node(pecb)) + DRV_PCI_DEVICE_ENTRY_base_offset_for_mmio(&ECB_pcidev_entry_node(pecb)); virtual_address = ioremap_nocache(physical_address,4096); //Read in the counts into temporary buffer FOR_EACH_PCI_DATA_REG(pecb,i,dev_idx,offset_delta) { event_id = ECB_entries_event_id_index_local(pecb,i); tmp_value = readl((U32*)((char*)(virtual_address) + offset_delta)); for ( j = 0; j < (U32)GLOBAL_STATE_num_cpus(driver_state) ; j++) { LWPMU_DEVICE_prev_val_per_thread(&devices[dev_idx])[j][event_id + 1] = tmp_value; // need to account for group id #if defined(MYDEBUG) SEP_PRINT_DEBUG("initial value for i =%d is 0x%x\n",i,LWPMU_DEVICE_prev_val_per_thread(&devices[dev_idx])[j][i]); #endif } // this is needed for overflow detection of the accumulators. if (LWPMU_DEVICE_counter_mask(&devices[dev_idx]) == 0) { LWPMU_DEVICE_counter_mask(&devices[dev_idx]) = (U64)ECB_entries_max_bits(pecb,i); } } END_FOR_EACH_PCI_DATA_REG;