static void cpu_startup(void *dummy) { /* * Initialise the decrementer-based clock. */ decr_init(); /* * Good {morning,afternoon,evening,night}. */ cpu_setup(PCPU_GET(cpuid)); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem), ptoa((uintmax_t)physmem) / 1048576); realmem = physmem; if (bootverbose) printf("available KVA = %zu (%zu MB)\n", virtual_end - virtual_avail, (virtual_end - virtual_avail) / 1048576); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size1 = phys_avail[indx + 1] - phys_avail[indx]; #ifdef __powerpc64__ printf("0x%016jx - 0x%016jx, %jd bytes (%jd pages)\n", #else printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n", #endif (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); }
int test_perfmonstart() { CpuInfo_t cpuinfo; int group1, group2; int cpu = 0; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; ret = perfmon_addEventSet(eventset_ok); if (ret != 0) goto fail; group1 = ret; ret = perfmon_setupCounters(group1); if (ret != 0) goto fail; if (perfmon_getIdOfActiveGroup() != group1) goto fail; ret = perfmon_startCounters(); if (ret != 0) goto fail; perfmon_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); topology_finalize(); return 0; }
static void opd_26_init(void) { size_t i; size_t opd_buf_size; opd_create_vmlinux(vmlinux, kernel_range); opd_buf_size = opd_read_fs_int("/dev/oprofile/", "buffer_size", 1); kernel_pointer_size = opd_read_fs_int("/dev/oprofile/", "pointer_size", 1); s_buf_bytesize = opd_buf_size * kernel_pointer_size; sbuf = xmalloc(s_buf_bytesize); opd_reread_module_info(); for (i = 0; i < OPD_MAX_STATS; i++) opd_stats[i] = 0; perfmon_init(); cookie_init(); sfile_init(); anon_init(); /* must be /after/ perfmon_init() at least */ if (atexit(clean_exit)) { perfmon_exit(); perror("oprofiled: couldn't set exit cleanup: "); exit(EXIT_FAILURE); } }
int test_perfmonstop_noadd() { CpuInfo_t cpuinfo; int cpu = 0; int group; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; ret = perfmon_stopCounters(); if (ret == 0) goto fail; perfmon_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); topology_finalize(); return 0; }
int test_perfmonstop_nostart() { CpuInfo_t cpuinfo; int cpu = 0; int group; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; ret = perfmon_addEventSet(eventset_ok); if (ret != 0) goto fail; group = ret; ret = perfmon_setupCounters(group); if (ret != 0) goto fail; ret = perfmon_stopCounters(); if (ret == 0) goto fail; perfmon_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); topology_finalize(); return 0; }
int test_perfmonresult_noadd() { CpuInfo_t cpuinfo; int cpu = 0; int group; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; double result = perfmon_getResult(0,0,0); if (result != 0) goto fail; perfmon_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); topology_finalize(); return 0; }
int main(int argc, char* argv[]) { int i; int* cpus; int gid; double result = 0.0; // Load the topology module and print some values. topology_init(); // CpuInfo_t contains global information like name, CPU family, ... CpuInfo_t info = get_cpuInfo(); // CpuTopology_t contains information about the topology of the CPUs. CpuTopology_t topo = get_cpuTopology(); printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads); cpus = malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; for (i=0;i<topo->numHWThreads;i++) { cpus[i] = topo->threadPool[i].apicId; } // Must be called before perfmon_init() but only if you want to use another // access mode as the pre-configured one. For direct access (0) you have to // be root. //accessClient_setaccessmode(0); // Initialize the perfmon module. perfmon_init(topo->numHWThreads, cpus); // Add eventset string to the perfmon module. gid = perfmon_addEventSet(EVENTSET); // Setup the eventset identified by group ID (gid). perfmon_setupCounters(gid); // Start all counters in the previously set up event set. perfmon_startCounters(); // Perform something sleep(2); // Stop all counters in the previously started event set. perfmon_stopCounters(); // Print the result of every thread/CPU. for (i = 0;i < topo->numHWThreads; i++) { result = perfmon_getResult(gid, 0, i); printf("Measurement result for event set %s at CPU %d: %f\n", EVENTSET, cpus[i], result); } // Uninitialize the perfmon module. perfmon_finalize(); // Uninitialize the topology module. topology_finalize(); return 0; }
static int lua_likwid_init(lua_State* L) { int ret; int nrThreads = luaL_checknumber(L,1); luaL_argcheck(L, nrThreads > 0, 1, "CPU count must be greater than 0"); int cpus[nrThreads]; if (!lua_istable(L, -1)) { lua_pushstring(L,"No table given as second argument"); lua_error(L); } for (ret = 1; ret<=nrThreads; ret++) { lua_rawgeti(L,-1,ret); cpus[ret-1] = lua_tounsigned(L,-1); lua_pop(L,1); } if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { numa_init(); numa_isInitialized = 1; numainfo = get_numaTopology(); } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (perfmon_isInitialized == 0) { ret = perfmon_init(nrThreads, &(cpus[0])); if (ret != 0) { lua_pushstring(L,"Cannot initialize likwid perfmon"); lua_error(L); return 1; } perfmon_isInitialized = 1; timer_isInitialized = 1; lua_pushinteger(L,ret); } return 1; }
void arch_init() { pci_init(); #ifdef __CONFIG_ENABLE_MPTABLES__ mptables_parse(); ioapic_init(); // MUST BE AFTER PCI/ISA INIT! // TODO: move these back to regular init. requires fixing the // __CONFIG_NETWORKING__ inits to not need multiple cores running. #endif // this returns when all other cores are done and ready to receive IPIs #ifdef __CONFIG_SINGLE_CORE__ smp_percpu_init(); #else smp_boot(); #endif proc_init(); /* EXPERIMENTAL NETWORK FUNCTIONALITY * To enable, define __CONFIG_NETWORKING__ in your Makelocal * If enabled, will load the rl8168 driver (if device exists) * and will a boot into userland matrix, so remote syscalls can be performed. * If in simulation, will do some debugging information with the ne2k device * * Note: If you use this, you should also define the mac address of the * teathered machine via USER_MAC_ADDRESS in Makelocal. * * Additionally, you should have a look at the syscall server in the tools directory */ #ifdef __CONFIG_NETWORKING__ #ifdef __CONFIG_SINGLE_CORE__ warn("You currently can't have networking if you boot into single core mode!!\n"); #else rl8168_init(); ne2k_init(); e1000_init(); #endif // __CONFIG_SINGLE_CORE__ #endif // __CONFIG_NETWORKING__ perfmon_init(); #ifdef __CONFIG_MONITOR_ON_INT__ /* Handler to read a char from the interrupt source and call the monitor. * Need to read the character so the device will send another interrupt. * Note this will read from both the serial and the keyboard, and throw away * the result. We condition, since we don't want to trigger on a keyboard * up interrupt */ void mon_int(struct trapframe *tf, void *data) { // Enable interrupts here so that we can receive // other interrupts (e.g. from the NIC) enable_irq(); if (cons_getc()) monitor(0); }
int __init oprofile_arch_init(struct oprofile_operations *ops) { int ret = -ENODEV; #ifdef CONFIG_PERFMON ret = perfmon_init(ops); #endif ops->backtrace = ia64_backtrace; return ret; }
int test_perfmoninit_faulty() { int cpu = 0; int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; perfmon_finalize(); return 0; fail: perfmon_finalize(); return 1; }
int test_perfmoninit() { int cpu = 0; int i; topology_init(); affinity_init(); for(i=0;i<10;i++) { perfmon_init(1, &cpu); perfmon_finalize(); } affinity_finalize(); topology_finalize(); return 1; }
static void opd_26_init(void) { size_t i; size_t opd_buf_size; unsigned long long start_time = 0ULL; struct timeval tv; opd_create_vmlinux(vmlinux, kernel_range); opd_create_xen(xenimage, xen_range); opd_buf_size = opd_read_fs_int("/dev/oprofile/", "buffer_size", 1); kernel_pointer_size = opd_read_fs_int("/dev/oprofile/", "pointer_size", 1); s_buf_bytesize = opd_buf_size * kernel_pointer_size; sbuf = xmalloc(s_buf_bytesize); opd_reread_module_info(); for (i = 0; i < OPD_MAX_STATS; i++) opd_stats[i] = 0; perfmon_init(); cookie_init(); sfile_init(); anon_init(); /* must be /after/ perfmon_init() at least */ if (atexit(clean_exit)) { perfmon_exit(); perror("oprofiled: couldn't set exit cleanup: "); exit(EXIT_FAILURE); } /* trigger kernel module setup before returning control to opcontrol */ opd_open_files(); gettimeofday(&tv, NULL); start_time = 0ULL; start_time = tv.tv_sec; sprintf(start_time_str, "%llu", start_time); }
int test_perfmonresult() { CpuInfo_t cpuinfo; int cpu = 0; int group; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; ret = perfmon_addEventSet(eventset_ok); if (ret != 0) goto fail; group = ret; ret = perfmon_setupCounters(group); if (ret != 0) goto fail; ret = perfmon_startCounters(); if (ret != 0) goto fail; sleep(1); ret = perfmon_stopCounters(); if (ret != 0) goto fail; if ((perfmon_getResult(group,0,0) == 0)||(perfmon_getResult(group,1,0) == 0)) goto fail; if (perfmon_getTimeOfGroup(group) == 0) goto fail; perfmon_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); topology_finalize(); return 0; }
int test_perfmoninit_valid() { int cpu = 0; topology_init(); affinity_init(); int ret = perfmon_init(1, &cpu); if (ret != 0) goto fail; if (perfmon_getNumberOfGroups() != 0) goto fail; if (perfmon_getNumberOfThreads() != 1) goto fail; perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
int main(int argc, char* argv[]) { int i, j; int err; int* cpus; int gid; double result = 0.0; char estr[] = "INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2,TEMP_CORE:TMP0"; // Load the topology module and print some values. err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } // CpuInfo_t contains global information like name, CPU family, ... CpuInfo_t info = get_cpuInfo(); // CpuTopology_t contains information about the topology of the CPUs. CpuTopology_t topo = get_cpuTopology(); // Create affinity domains. Commonly only needed when reading Uncore counters //affinity_init(); printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads); cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; for (i=0;i<topo->numHWThreads;i++) { cpus[i] = topo->threadPool[i].apicId; } // Must be called before perfmon_init() but only if you want to use another // access mode as the pre-configured one. For direct access (0) you have to // be root. //accessClient_setaccessmode(0); // Initialize the perfmon module. err = perfmon_init(topo->numHWThreads, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); topology_finalize(); return 1; } // Add eventset string to the perfmon module. gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); topology_finalize(); return 1; } // Setup the eventset identified by group ID (gid). err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); topology_finalize(); return 1; } // Start all counters in the previously set up event set. err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Perform something sleep(2); // Stop all counters in the previously started event set. err = perfmon_stopCounters(); if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Print the result of every thread/CPU for all events in estr. char* ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < topo->numHWThreads; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } // Uninitialize the perfmon module. perfmon_finalize(); // Uninitialize the topology module. topology_finalize(); return 0; }
int main (int argc, char** argv) { int socket_fd = -1; int optInfo = 0; int optClock = 0; int optStethoscope = 0; int optSockets = 0; double runtime; int hasDRAM = 0; int c; bstring argString; bstring eventString = bfromcstr("CLOCK"); int numSockets=1; int numThreads=0; int threadsSockets[MAX_NUM_NODES*2]; int threads[MAX_NUM_THREADS]; threadsSockets[0] = 0; if (argc == 1) { HELP_MSG; exit (EXIT_SUCCESS); } while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1) { switch (c) { case 'c': CHECK_OPTION_STRING; numSockets = bstr_to_cpuset_physical((uint32_t*) threadsSockets, argString); bdestroy(argString); optSockets = 1; break; case 'h': HELP_MSG; exit (EXIT_SUCCESS); case 'i': optInfo = 1; break; case 'M': /* Set MSR Access mode */ CHECK_OPTION_STRING; accessClient_setaccessmode(str2int((char*) argString->data)); bdestroy(argString); break; case 'p': optClock = 1; break; case 's': CHECK_OPTION_STRING; optStethoscope = str2int((char*) argString->data); bdestroy(argString); break; case 'v': VERSION_MSG; exit (EXIT_SUCCESS); case '?': if (optopt == 's' || optopt == 'M' || optopt == 'c') { HELP_MSG; } else if (isprint (optopt)) { fprintf (stderr, "Unknown option `-%c'.\n", optopt); } else { fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); } exit( EXIT_FAILURE); default: HELP_MSG; exit (EXIT_SUCCESS); } } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } if (optClock && optind == argc) { fprintf(stderr,"Commandline option -p requires an executable.\n"); exit(EXIT_FAILURE); } if (optSockets && !optStethoscope && optind == argc) { fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n"); exit(EXIT_FAILURE); } if (cpuid_init() == EXIT_FAILURE) { fprintf(stderr, "CPU not supported\n"); exit(EXIT_FAILURE); } if (numSockets > cpuid_topology.numSockets) { fprintf(stderr, "System has only %d sockets but %d are given on commandline\n", cpuid_topology.numSockets, numSockets); exit(EXIT_FAILURE); } numa_init(); /* consider NUMA node as power unit for the moment */ accessClient_init(&socket_fd); msr_init(socket_fd); timer_init(); /* check for supported processors */ if ((cpuid_info.model == SANDYBRIDGE_EP) || (cpuid_info.model == SANDYBRIDGE) || (cpuid_info.model == IVYBRIDGE) || (cpuid_info.model == IVYBRIDGE_EP) || (cpuid_info.model == HASWELL) || (cpuid_info.model == NEHALEM_BLOOMFIELD) || (cpuid_info.model == NEHALEM_LYNNFIELD) || (cpuid_info.model == NEHALEM_WESTMERE)) { power_init(numa_info.nodes[0].processors[0]); } else { fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n"); exit(EXIT_FAILURE); } double clock = (double) timer_getCpuClock(); printf(HLINE); printf("CPU name:\t%s \n",cpuid_info.name); printf("CPU clock:\t%3.2f GHz \n", (float) clock * 1.E-09); printf(HLINE); if (optInfo) { if (power_info.turbo.numSteps != 0) { printf("Base clock:\t%.2f MHz \n", power_info.baseFrequency ); printf("Minimal clock:\t%.2f MHz \n", power_info.minFrequency ); printf("Turbo Boost Steps:\n"); for (int i=0; i < power_info.turbo.numSteps; i++ ) { printf("C%d %.2f MHz \n",i+1, power_info.turbo.steps[i] ); } } printf(HLINE); } if (cpuid_info.model == SANDYBRIDGE_EP) { hasDRAM = 1; } else if ((cpuid_info.model != SANDYBRIDGE) && (cpuid_info.model != SANDYBRIDGE_EP) && (cpuid_info.model != IVYBRIDGE) && (cpuid_info.model != IVYBRIDGE_EP) && (cpuid_info.model != HASWELL)) { fprintf (stderr, "RAPL not supported on this processor!\n"); exit(EXIT_FAILURE); } if (optInfo) { printf("Thermal Spec Power: %g Watts \n", power_info.tdp ); printf("Minimum Power: %g Watts \n", power_info.minPower); printf("Maximum Power: %g Watts \n", power_info.maxPower); printf("Maximum Time Window: %g micro sec \n", power_info.maxTimeWindow); printf(HLINE); exit(EXIT_SUCCESS); } if (optClock) { affinity_init(); argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1); for (int i=1; i<numSockets; i++) { bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1); bconcat(argString, tExpr); } numThreads = bstr_to_cpuset(threads, argString); bdestroy(argString); perfmon_init(numThreads, threads, stdout); perfmon_setupEventSet(eventString, NULL); } { PowerData pDataPkg[MAX_NUM_NODES*2]; PowerData pDataDram[MAX_NUM_NODES*2]; printf("Measure on sockets: %d", threadsSockets[0]); for (int i=1; i<numSockets; i++) { printf(", %d", threadsSockets[i]); } printf("\n"); if (optStethoscope) { if (optClock) { perfmon_startCounters(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM); power_start(pDataPkg+i, cpuId, PKG); } } sleep(optStethoscope); if (optClock) { perfmon_stopCounters(); perfmon_printCounterResults(); perfmon_finalize(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; power_stop(pDataPkg+i, cpuId, PKG); if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM); } } runtime = (double) optStethoscope; } else { TimerData time; argv += optind; bstring exeString = bfromcstr(argv[0]); for (int i=1; i<(argc-optind); i++) { bconchar(exeString, ' '); bcatcstr(exeString, argv[i]); } printf("%s\n",bdata(exeString)); if (optClock) { perfmon_startCounters(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM); power_start(pDataPkg+i, cpuId, PKG); } timer_start(&time); } if (system(bdata(exeString)) == EOF) { fprintf(stderr, "Failed to execute %s!\n", bdata(exeString)); exit(EXIT_FAILURE); } if (optClock) { perfmon_stopCounters(); perfmon_printCounterResults(); perfmon_finalize(); } else { timer_stop(&time); for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; power_stop(pDataPkg+i, cpuId, PKG); if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM); } runtime = timer_print(&time); } } if (!optClock) { printf("Runtime: %g second \n",runtime); printf(HLINE); for (int i=0; i<numSockets; i++) { printf("Socket %d\n",threadsSockets[i]); printf("Domain: PKG \n"); printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i)); printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime ); if (hasDRAM) { printf("Domain: DRAM \n"); printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i)); printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime ); } printf("\n"); } } } #if 0 if ( cpuid_hasFeature(TM2) ) { thermal_init(0); printf("Current core temperatures:\n"); for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ ) { printf("Core %d: %u C\n", numa_info.nodes[socketId].processors[i], thermal_read(numa_info.nodes[socketId].processors[i])); } } #endif msr_finalize(); return EXIT_SUCCESS; }
int main(int argn, char** argc) { int err, i ,j; int numCPUs = 0; int gid; DATATYPE *a,*b,*c,*d; TimeData timer; double triad_time, copy_time, scale_time, stream_time; char estr[1024]; double result, scalar = 3.0; char* ptr; if (argn != 3) { printf("Usage: %s <cpustr> <events>\n", argc[0]); return 1; } strcpy(estr, argc[2]); allocate_vector(&a, SIZE); allocate_vector(&b, SIZE); allocate_vector(&c, SIZE); allocate_vector(&d, SIZE); err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } CpuTopology_t topo = get_cpuTopology(); affinity_init(); int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads); omp_set_num_threads(numCPUs); err = perfmon_init(numCPUs, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); affinity_finalize(); topology_finalize(); return 1; } gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { printf ("Number of Threads requested = %i\n",omp_get_num_threads()); } likwid_pinThread(cpus[omp_get_thread_num()]); printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu()); } #endif #pragma omp parallel for for (int j=0; j<SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; d[j] = 1.0; } err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("copy"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j]; } LIKWID_MARKER_STOP("copy"); } } time_stop(&timer); err = perfmon_stopCounters(); copy_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("scale"); #pragma omp for for (int j=0; j<SIZE; j++) { b[j] = scalar*c[j]; } LIKWID_MARKER_STOP("scale"); } } time_stop(&timer); err = perfmon_stopCounters(); scale_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("stream"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j] + b[j]; } LIKWID_MARKER_STOP("stream"); } } time_stop(&timer); err = perfmon_stopCounters(); stream_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("triad"); #pragma omp for for (int j=0; j<SIZE; j++) { a[j] = b[j] + c[j] * scalar; } LIKWID_MARKER_STOP("triad"); } } time_stop(&timer); err = perfmon_stopCounters(); triad_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(4*SIZE*sizeof(DATATYPE)), triad_time, 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
void likwid_markerInit(void) { int i; int verbosity; int setinit = 0; bstring bThreadStr; bstring bEventStr; struct bstrList* threadTokens; struct bstrList* eventStrings; char* modeStr = getenv("LIKWID_MODE"); char* eventStr = getenv("LIKWID_EVENTS"); char* cThreadStr = getenv("LIKWID_THREADS"); char* filepath = getenv("LIKWID_FILEPATH"); char* perfpid = getenv("LIKWID_PERF_EXECPID"); char execpid[20]; /* Dirty hack to avoid nonnull warnings */ int (*ownatoi)(const char*); ownatoi = &atoi; if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0) { setinit = 1; } else if (likwid_init == 0) { fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n"); return; } else { return; } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } topology_init(); numa_init(); affinity_init(); hashTable_init(); //#ifndef LIKWID_USE_PERFEVENT HPMmode(atoi(modeStr)); //#endif if (getenv("LIKWID_DEBUG") != NULL) { perfmon_verbosity = atoi(getenv("LIKWID_DEBUG")); verbosity = perfmon_verbosity; } bThreadStr = bfromcstr(cThreadStr); threadTokens = bsplit(bThreadStr,','); num_cpus = threadTokens->qty; for (i=0; i<num_cpus; i++) { threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i])); } bdestroy(bThreadStr); bstrListDestroy(threadTokens); if (getenv("LIKWID_PIN") != NULL) { likwid_pinThread(threads2Cpu[0]); if (getenv("OMP_NUM_THREADS") != NULL) { if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus) { use_locks = 1; } } if (getenv("CILK_NWORKERS") != NULL) { if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus) { use_locks = 1; } } } #ifdef LIKWID_USE_PERFEVENT if (perfpid != NULL) { snprintf(execpid, 19, "%d", getpid()); setenv("LIKWID_PERF_PID", execpid, 1); char* perfflags = getenv("LIKWID_PERF_FLAGS"); if (perfflags) { setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1); } } #endif i = perfmon_init(num_cpus, threads2Cpu); if (i<0) { //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n"); return; } bEventStr = bfromcstr(eventStr); eventStrings = bsplit(bEventStr,'|'); numberOfGroups = eventStrings->qty; groups = malloc(numberOfGroups * sizeof(int)); if (!groups) { fprintf(stderr,"Cannot allocate space for group handling.\n"); bstrListDestroy(eventStrings); exit(EXIT_FAILURE); } for (i=0; i<eventStrings->qty; i++) { groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i])); } bstrListDestroy(eventStrings); bdestroy(bEventStr); for (i=0; i<num_cpus; i++) { hashTable_initThread(threads2Cpu[i]); for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++) { groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE; groupSet->groups[groups[0]].state = STATE_START; } } if (setinit) { likwid_init = 1; } threads2Pthread[registered_cpus] = pthread_self(); registered_cpus++; groupSet->activeGroup = 0; perfmon_setupCounters(groupSet->activeGroup); perfmon_startCounters(); }
int test_perfmonperfgroup() { CpuInfo_t cpuinfo; int i; int cpu = 0; topology_init(); cpuinfo = get_cpuInfo(); int ret = perfmon_init(1, &cpu); if (ret != 0) { printf("Perfmon init failed\n"); goto fail; } char** glist = NULL; char** slist = NULL; char** llist = NULL; ret = perfmon_getGroups(&glist, &slist, &llist); if (ret <= 0) { goto fail; } ret = perfmon_addEventSet(glist[0]); if (ret != 0) { printf("Perfmon addEventSet(%s) failed\n", glist[0]); goto fail; } if (perfmon_getNumberOfEvents(ret) == 0) { printf("Perfmon number of events == 0\n"); goto fail; } if (perfmon_getNumberOfMetrics(ret) == 0) { printf("Perfmon number of metrics == 0\n"); goto fail; } for (i=0; i<perfmon_getNumberOfEvents(ret); i++) { if (strcmp(perfmon_getEventName(ret, i), "") == 0) goto fail; if (strcmp(perfmon_getCounterName(ret, i), "") == 0) goto fail; } if (strcmp(perfmon_getGroupName(ret), "Custom") == 0) { goto fail; } if (strcmp(perfmon_getGroupInfoShort(ret), "Custom") == 0) { goto fail; } if (strcmp(perfmon_getGroupInfoLong(ret), "Custom") == 0) { goto fail; } if (perfmon_getLastTimeOfGroup(ret) != 0) { goto fail; } if (perfmon_getTimeOfGroup(ret) != 0) { goto fail; } for (i=0; i<perfmon_getNumberOfMetrics(ret); i++) { if (strcmp(perfmon_getMetricName(ret, i), "") == 0) goto fail; if (perfmon_getMetric(ret, i, 0) != 0.0) goto fail; } free(glist); free(slist); free(llist); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; fail: if (glist) free(glist); if (slist) free(slist); if (llist) free(llist); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
int test_perfmoncustomgroup() { CpuInfo_t cpuinfo; int cpu = 0; topology_init(); cpuinfo = get_cpuInfo(); int ret = perfmon_init(1, &cpu); if (ret != 0) { printf("Perfmon init failed\n"); goto fail; } ret = perfmon_addEventSet(eventset_ok); if (ret != 0) { printf("Perfmon addEventSet(ok) failed\n"); goto fail; } if (perfmon_getNumberOfEvents(ret) != 3) { printf("Perfmon number of events != 3\n"); goto fail; } if (perfmon_getNumberOfMetrics(ret) != 0) { printf("Perfmon number of metrics != 0\n"); goto fail; } if (strcmp(perfmon_getEventName(ret, 0), event1_ok) != 0) { goto fail; } if (strcmp(perfmon_getEventName(ret, 1), event2_ok) != 0) { goto fail; } if (strcmp(perfmon_getEventName(ret, 2), event3_ok) != 0) { goto fail; } if (strcmp(perfmon_getCounterName(ret, 0), ctr1_ok) != 0) { goto fail; } if (strcmp(perfmon_getCounterName(ret, 1), ctr2_ok) != 0) { goto fail; } if (strcmp(perfmon_getCounterName(ret, 2), ctr3_ok) != 0) { goto fail; } if (strcmp(perfmon_getGroupName(ret), "Custom") != 0) { goto fail; } if (strcmp(perfmon_getGroupInfoShort(ret), "Custom") != 0) { goto fail; } if (strcmp(perfmon_getGroupInfoLong(ret), "Custom") != 0) { goto fail; } if (perfmon_getLastTimeOfGroup(ret) != 0) { goto fail; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
int test_perfmonaddeventset() { char eventset_fail1[] = "INSTR_RETIRED.ANY:FIXC0"; char eventset_fail2[] = "INSTR_RETIRED-ANY:FIXC0"; CpuInfo_t cpuinfo; int cpu = 0; topology_init(); cpuinfo = get_cpuInfo(); if (cpuinfo->isIntel == 0) { topology_finalize(); return 1; } int ret = perfmon_init(1, &cpu); if (ret != 0) { printf("Perfmon init failed\n"); goto fail; } if (perfmon_getNumberOfGroups() != 0) { printf("Perfmon number of groups != 0\n"); goto fail; } if (perfmon_getNumberOfThreads() != 1) { printf("Perfmon number of threads != 1\n"); goto fail; } if (perfmon_getIdOfActiveGroup() != -1) { printf("Perfmon id of active group != -1\n"); goto fail; } ret = perfmon_addEventSet(eventset_ok); if (ret != 0) { printf("Perfmon addEventSet(ok) failed\n"); goto fail; } if (perfmon_getNumberOfGroups() != 1) { printf("Perfmon number of groups != 1\n"); goto fail; } if (perfmon_getNumberOfEvents(ret) != 3) { printf("Perfmon number of events != 3\n"); goto fail; } if (perfmon_getIdOfActiveGroup() != -1) { printf("Perfmon id of active group != -1\n"); goto fail; } ret = perfmon_addEventSet(eventset_option); if (ret != 1) { printf("Perfmon addEventSet(options) failed\n"); goto fail; } if (perfmon_getNumberOfGroups() != 2) { printf("Perfmon number of groups != 2\n"); goto fail; } if (perfmon_getNumberOfEvents(ret) != 3) { printf("Perfmon number of events != 3\n"); goto fail; } if (perfmon_getIdOfActiveGroup() != -1) { printf("Perfmon id of active group != -1\n"); goto fail; } ret = perfmon_addEventSet(eventset_fail1); if (ret >= 0) { printf("Perfmon addEventSet(fail1) failed\n"); goto fail; } if (perfmon_getNumberOfGroups() != 2) { printf("Perfmon number of groups != 2\n"); goto fail; } ret = perfmon_addEventSet(eventset_fail2); if (ret >= 0) { printf("Perfmon addEventSet(fail2) failed\n"); goto fail; } if (perfmon_getNumberOfGroups() != 2) { printf("Perfmon number of groups != 2\n"); goto fail; } if (perfmon_getIdOfActiveGroup() != -1) { printf("Perfmon id of active group != -1\n"); goto fail; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; fail: perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
void likwid_markerInit(void) { int i; int verbosity; bstring bThreadStr; bstring bEventStr; struct bstrList* threadTokens; struct bstrList* eventStrings; char* modeStr = getenv("LIKWID_MODE"); char* eventStr = getenv("LIKWID_EVENTS"); char* cThreadStr = getenv("LIKWID_THREADS"); char* filepath = getenv("LIKWID_FILEPATH"); /* Dirty hack to avoid nonnull warnings */ int (*ownatoi)(const char*); ownatoi = &atoi; if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL)) { likwid_init = 1; } else if (likwid_init == 0) { fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n"); fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n"); return; } else { return; } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } topology_init(); numa_init(); affinity_init(); hashTable_init(); for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT; HPMmode(atoi(modeStr)); if (getenv("LIKWID_DEBUG") != NULL) { perfmon_verbosity = atoi(getenv("LIKWID_DEBUG")); verbosity = perfmon_verbosity; } bThreadStr = bfromcstr(cThreadStr); threadTokens = bstrListCreate(); threadTokens = bsplit(bThreadStr,','); num_cpus = threadTokens->qty; for (i=0; i<num_cpus; i++) { threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i])); } bdestroy(bThreadStr); bstrListDestroy(threadTokens); if (getenv("LIKWID_PIN") != NULL) { likwid_pinThread(threads2Cpu[0]); if (getenv("OMP_NUM_THREADS") != NULL) { if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus) { use_locks = 1; } } if (getenv("CILK_NWORKERS") != NULL) { if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus) { use_locks = 1; } } } i = perfmon_init(num_cpus, threads2Cpu); if (i<0) { fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n"); return; } bEventStr = bfromcstr(eventStr); eventStrings = bstrListCreate(); eventStrings = bsplit(bEventStr,'|'); numberOfGroups = eventStrings->qty; groups = malloc(numberOfGroups * sizeof(int)); if (!groups) { fprintf(stderr,"Cannot allocate space for group handling.\n"); bstrListDestroy(eventStrings); exit(EXIT_FAILURE); } for (i=0; i<eventStrings->qty; i++) { groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i])); } bstrListDestroy(eventStrings); bdestroy(bEventStr); for (i=0; i<num_cpus; i++) { hashTable_initThread(threads2Cpu[i]); for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++) { groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE; } } groupSet->activeGroup = 0; }