void likwid_markerThreadInit(void) { int myID = 0, i = 0; pthread_t t; if ( !likwid_init ) { return; } pthread_mutex_lock(&globalLock); t = pthread_self(); for (i=0; i<registered_cpus; i++) { if (pthread_equal(t, threads2Pthread[i])) { t = 0; } } if (t != 0) { threads2Pthread[registered_cpus] = t; myID = registered_cpus++; } pthread_mutex_unlock(&globalLock); if (getenv("LIKWID_PIN") != NULL) { cpu_set_t cpuset; CPU_ZERO(&cpuset); sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset); if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus])) { likwid_pinThread(threads2Cpu[myID % num_cpus]); DEBUG_PRINT(DEBUGLEV_DEVELOP, Pin thread %lu to CPU %d currently %d, gettid(), threads2Cpu[myID % num_cpus], sched_getcpu()); } } }
void likwid_markerThreadInit(void) { int myID; if ( !likwid_init ) { return; } pthread_mutex_lock(&globalLock); myID = registered_cpus++; pthread_mutex_unlock(&globalLock); if (getenv("LIKWID_PIN") != NULL) { cpu_set_t cpuset; CPU_ZERO(&cpuset); sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset); if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus])) { likwid_pinThread(threads2Cpu[myID % num_cpus]); DEBUG_PRINT(DEBUGLEV_DEVELOP, "Pin thread %lu to CPU %d\n", gettid(), threads2Cpu[myID % num_cpus]); } } }
void likwid_markerInit(void) { int i; int verbosity; bstring bThreadStr; bstring bEventStr; struct bstrList* threadTokens; struct bstrList* eventStrings; char* modeStr = getenv("LIKWID_MODE"); char* eventStr = getenv("LIKWID_EVENTS"); char* cThreadStr = getenv("LIKWID_THREADS"); char* filepath = getenv("LIKWID_FILEPATH"); /* Dirty hack to avoid nonnull warnings */ int (*ownatoi)(const char*); ownatoi = &atoi; if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL)) { likwid_init = 1; } else if (likwid_init == 0) { fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n"); fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n"); return; } else { return; } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } topology_init(); numa_init(); affinity_init(); hashTable_init(); for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT; HPMmode(atoi(modeStr)); if (getenv("LIKWID_DEBUG") != NULL) { perfmon_verbosity = atoi(getenv("LIKWID_DEBUG")); verbosity = perfmon_verbosity; } bThreadStr = bfromcstr(cThreadStr); threadTokens = bstrListCreate(); threadTokens = bsplit(bThreadStr,','); num_cpus = threadTokens->qty; for (i=0; i<num_cpus; i++) { threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i])); } bdestroy(bThreadStr); bstrListDestroy(threadTokens); if (getenv("LIKWID_PIN") != NULL) { likwid_pinThread(threads2Cpu[0]); if (getenv("OMP_NUM_THREADS") != NULL) { if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus) { use_locks = 1; } } if (getenv("CILK_NWORKERS") != NULL) { if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus) { use_locks = 1; } } } i = perfmon_init(num_cpus, threads2Cpu); if (i<0) { fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n"); return; } bEventStr = bfromcstr(eventStr); eventStrings = bstrListCreate(); eventStrings = bsplit(bEventStr,'|'); numberOfGroups = eventStrings->qty; groups = malloc(numberOfGroups * sizeof(int)); if (!groups) { fprintf(stderr,"Cannot allocate space for group handling.\n"); bstrListDestroy(eventStrings); exit(EXIT_FAILURE); } for (i=0; i<eventStrings->qty; i++) { groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i])); } bstrListDestroy(eventStrings); bdestroy(bEventStr); for (i=0; i<num_cpus; i++) { hashTable_initThread(threads2Cpu[i]); for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++) { groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE; } } groupSet->activeGroup = 0; }
void likwid_markerInit(void) { int i; int verbosity; int setinit = 0; bstring bThreadStr; bstring bEventStr; struct bstrList* threadTokens; struct bstrList* eventStrings; char* modeStr = getenv("LIKWID_MODE"); char* eventStr = getenv("LIKWID_EVENTS"); char* cThreadStr = getenv("LIKWID_THREADS"); char* filepath = getenv("LIKWID_FILEPATH"); char* perfpid = getenv("LIKWID_PERF_EXECPID"); char execpid[20]; /* Dirty hack to avoid nonnull warnings */ int (*ownatoi)(const char*); ownatoi = &atoi; if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0) { setinit = 1; } else if (likwid_init == 0) { fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n"); return; } else { return; } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } topology_init(); numa_init(); affinity_init(); hashTable_init(); //#ifndef LIKWID_USE_PERFEVENT HPMmode(atoi(modeStr)); //#endif if (getenv("LIKWID_DEBUG") != NULL) { perfmon_verbosity = atoi(getenv("LIKWID_DEBUG")); verbosity = perfmon_verbosity; } bThreadStr = bfromcstr(cThreadStr); threadTokens = bsplit(bThreadStr,','); num_cpus = threadTokens->qty; for (i=0; i<num_cpus; i++) { threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i])); } bdestroy(bThreadStr); bstrListDestroy(threadTokens); if (getenv("LIKWID_PIN") != NULL) { likwid_pinThread(threads2Cpu[0]); if (getenv("OMP_NUM_THREADS") != NULL) { if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus) { use_locks = 1; } } if (getenv("CILK_NWORKERS") != NULL) { if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus) { use_locks = 1; } } } #ifdef LIKWID_USE_PERFEVENT if (perfpid != NULL) { snprintf(execpid, 19, "%d", getpid()); setenv("LIKWID_PERF_PID", execpid, 1); char* perfflags = getenv("LIKWID_PERF_FLAGS"); if (perfflags) { setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1); } } #endif i = perfmon_init(num_cpus, threads2Cpu); if (i<0) { //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n"); return; } bEventStr = bfromcstr(eventStr); eventStrings = bsplit(bEventStr,'|'); numberOfGroups = eventStrings->qty; groups = malloc(numberOfGroups * sizeof(int)); if (!groups) { fprintf(stderr,"Cannot allocate space for group handling.\n"); bstrListDestroy(eventStrings); exit(EXIT_FAILURE); } for (i=0; i<eventStrings->qty; i++) { groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i])); } bstrListDestroy(eventStrings); bdestroy(bEventStr); for (i=0; i<num_cpus; i++) { hashTable_initThread(threads2Cpu[i]); for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++) { groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE; groupSet->groups[groups[0]].state = STATE_START; } } if (setinit) { likwid_init = 1; } threads2Pthread[registered_cpus] = pthread_self(); registered_cpus++; groupSet->activeGroup = 0; perfmon_setupCounters(groupSet->activeGroup); perfmon_startCounters(); }
int main(int argn, char** argc) { int err, i ,j; int numCPUs = 0; int gid; DATATYPE *a,*b,*c,*d; TimeData timer; double triad_time, copy_time, scale_time, stream_time; char estr[1024]; double result, scalar = 3.0; char* ptr; if (argn != 3) { printf("Usage: %s <cpustr> <events>\n", argc[0]); return 1; } strcpy(estr, argc[2]); allocate_vector(&a, SIZE); allocate_vector(&b, SIZE); allocate_vector(&c, SIZE); allocate_vector(&d, SIZE); err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } CpuTopology_t topo = get_cpuTopology(); affinity_init(); int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads); omp_set_num_threads(numCPUs); err = perfmon_init(numCPUs, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); affinity_finalize(); topology_finalize(); return 1; } gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { printf ("Number of Threads requested = %i\n",omp_get_num_threads()); } likwid_pinThread(cpus[omp_get_thread_num()]); printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu()); } #endif #pragma omp parallel for for (int j=0; j<SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; d[j] = 1.0; } err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("copy"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j]; } LIKWID_MARKER_STOP("copy"); } } time_stop(&timer); err = perfmon_stopCounters(); copy_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("scale"); #pragma omp for for (int j=0; j<SIZE; j++) { b[j] = scalar*c[j]; } LIKWID_MARKER_STOP("scale"); } } time_stop(&timer); err = perfmon_stopCounters(); scale_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("stream"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j] + b[j]; } LIKWID_MARKER_STOP("stream"); } } time_stop(&timer); err = perfmon_stopCounters(); stream_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("triad"); #pragma omp for for (int j=0; j<SIZE; j++) { a[j] = b[j] + c[j] * scalar; } LIKWID_MARKER_STOP("triad"); } } time_stop(&timer); err = perfmon_stopCounters(); triad_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(4*SIZE*sizeof(DATATYPE)), triad_time, 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }