main(int argc, char **argv) { int rank; char* host; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); host = (char*) malloc(HOST_NAME_MAX * sizeof(char)); gethostname(host,HOST_NAME_MAX); MASTER(MPI started); MPI_Barrier(MPI_COMM_WORLD); printf("Process with rank %d running on Node %s Core %d\n",rank ,host, likwid_getProcessorId()); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); MASTER(Enter OpenMP parallel region); MPI_Barrier(MPI_COMM_WORLD); #pragma omp parallel { int coreId = likwid_getProcessorId(); #pragma omp critical { printf ("Rank %d Thread %d running on core %d \n",rank,omp_get_thread_num(), coreId); fflush(stdout); } } sleep(2); MPI_Finalize(); }
int main(int argc, char* argv[]) { int i, j ; double alpha = 3.14; /* Initialize */ for (i=0; i<SIZE; i++) { a[i] = 1.0/(double) i; b[i] = 1.0; c[i] = (double) i; } LIKWID_MARKER_INIT; // likwid_pinProcess(2); printf("Main running on core %d\n", likwid_getProcessorId()); /****************************************************/ #pragma omp parallel { LIKWID_MARKER_THREADINIT; char* label = malloc(40*sizeof(char)); int threadId = omp_get_thread_num(); // likwid_pinThread(threadId); printf("Thread running on core %d\n", likwid_getProcessorId()); for (int counter=1; counter< 3; counter++) { sprintf(label,"plain-%d",counter); #pragma omp barrier LIKWID_MARKER_START(label); for (j = 0; j < counter * threadId; j++) { for (i = 0; i < SIZE; i++) { a[i] = b[i] + alpha * c[i]; sum += a[i]; } } #pragma omp barrier LIKWID_MARKER_STOP(label); printf("Flops performed thread %d region %s: %g\n",threadId, label,(double)counter*threadId*SIZE*3); } free(label); } /****************************************************/ LIKWID_MARKER_CLOSE; printf( "OK, dofp result = %e\n", sum); }
int hashTable_get(bstring label, LikwidThreadResults** resEntry) { int coreID = likwid_getProcessorId(); ThreadList* resPtr = threadList[coreID]; /* check if thread was already initialized */ if (resPtr == NULL) { resPtr = (ThreadList*) malloc(sizeof(ThreadList)); /* initialize structure */ resPtr->tid = pthread_self(); resPtr->coreId = coreID; resPtr->hashTable = g_hash_table_new(g_str_hash, g_str_equal); threadList[coreID] = resPtr; } (*resEntry) = g_hash_table_lookup(resPtr->hashTable, (gpointer) bdata(label)); /* if region is not known create new region and add to hashtable */ if ( (*resEntry) == NULL ) { (*resEntry) = (LikwidThreadResults*) malloc(sizeof(LikwidThreadResults)); (*resEntry)->label = bstrcpy (label); (*resEntry)->time = 0.0; (*resEntry)->count = 0; for (int i=0; i< NUM_PMC; i++) (*resEntry)->PMcounters[i] = 0.0; g_hash_table_insert( resPtr->hashTable, (gpointer) g_strdup(bdata(label)), (gpointer) (*resEntry)); } return coreID; }
void likwid_markerThreadInit(void) { if ( ! likwid_init ) { return; } int cpuId = likwid_getProcessorId(); if (accessClient_mode != DAEMON_AM_DIRECT) { if (thread_socketFD[cpuId] == -1) { accessClient_init(&thread_socketFD[cpuId]); } } }
void likwid_markerThreadInit(void) { int myID = 0, i = 0; pthread_t t; if ( !likwid_init ) { return; } pthread_mutex_lock(&globalLock); t = pthread_self(); for (i=0; i<registered_cpus; i++) { if (pthread_equal(t, threads2Pthread[i])) { t = 0; } } if (t != 0) { threads2Pthread[registered_cpus] = t; myID = registered_cpus++; } pthread_mutex_unlock(&globalLock); if (getenv("LIKWID_PIN") != NULL) { cpu_set_t cpuset; CPU_ZERO(&cpuset); sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset); if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus])) { likwid_pinThread(threads2Cpu[myID % num_cpus]); DEBUG_PRINT(DEBUGLEV_DEVELOP, Pin thread %lu to CPU %d currently %d, gettid(), threads2Cpu[myID % num_cpus], sched_getcpu()); } } }
void likwid_markerThreadInit(void) { int myID; if ( !likwid_init ) { return; } pthread_mutex_lock(&globalLock); myID = registered_cpus++; pthread_mutex_unlock(&globalLock); if (getenv("LIKWID_PIN") != NULL) { cpu_set_t cpuset; CPU_ZERO(&cpuset); sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset); if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus])) { likwid_pinThread(threads2Cpu[myID % num_cpus]); DEBUG_PRINT(DEBUGLEV_DEVELOP, "Pin thread %lu to CPU %d\n", gettid(), threads2Cpu[myID % num_cpus]); } } }
void likwid_markerInit(void) { int cpuId = likwid_getProcessorId(); char* modeStr = getenv("LIKWID_MODE"); char* maskStr = getenv("LIKWID_MASK"); if ((modeStr != NULL) && (maskStr != NULL)) { likwid_init = 1; } else { return; } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } cpuid_init(); numa_init(); affinity_init(); timer_init(); hashTable_init(); for(int i=0; i<MAX_NUM_THREADS; i++) thread_socketFD[i] = -1; for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT; accessClient_mode = atoi(modeStr); str2BitMask(maskStr, &counterMask); if (accessClient_mode != DAEMON_AM_DIRECT) { accessClient_init(&thread_socketFD[cpuId]); } msr_init(thread_socketFD[cpuId]); thermal_init(cpuId); switch ( cpuid_info.family ) { case P6_FAMILY: switch ( cpuid_info.model ) { case PENTIUM_M_BANIAS: case PENTIUM_M_DOTHAN: perfmon_counter_map = pm_counter_map; perfmon_numCounters = NUM_COUNTERS_PM; perfmon_numCountersCore = NUM_COUNTERS_CORE_PM; break; case ATOM_45: case ATOM_32: case ATOM_22: case ATOM: perfmon_counter_map = core2_counter_map; perfmon_numCounters = NUM_COUNTERS_CORE2; perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2; break; case CORE_DUO: ERROR_PLAIN_PRINT(Unsupported Processor); break; case XEON_MP: case CORE2_65: case CORE2_45: perfmon_counter_map = core2_counter_map; perfmon_numCounters = NUM_COUNTERS_CORE2; perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2; break; case NEHALEM_EX: case WESTMERE_EX: perfmon_counter_map = westmereEX_counter_map; perfmon_numCounters = NUM_COUNTERS_WESTMEREEX; perfmon_numCountersCore = NUM_COUNTERS_CORE_WESTMEREEX; perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_WESTMEREEX; break; case NEHALEM_BLOOMFIELD: case NEHALEM_LYNNFIELD: case NEHALEM_WESTMERE_M: case NEHALEM_WESTMERE: perfmon_counter_map = nehalem_counter_map; perfmon_numCounters = NUM_COUNTERS_NEHALEM; perfmon_numCountersCore = NUM_COUNTERS_CORE_NEHALEM; perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_NEHALEM; break; case IVYBRIDGE: case IVYBRIDGE_EP: { int socket_fd = thread_socketFD[cpuId]; hasPCICounters = 1; power_init(0); /* FIXME Static coreId is dangerous */ pci_init(socket_fd); perfmon_counter_map = ivybridge_counter_map; perfmon_numCounters = NUM_COUNTERS_IVYBRIDGE; perfmon_numCountersCore = NUM_COUNTERS_CORE_IVYBRIDGE; perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_IVYBRIDGE; } break; case HASWELL: case HASWELL_EX: case HASWELL_M1: case HASWELL_M2: power_init(0); /* FIXME Static coreId is dangerous */ perfmon_counter_map = haswell_counter_map; perfmon_numCounters = NUM_COUNTERS_HASWELL; perfmon_numCountersCore = NUM_COUNTERS_CORE_HASWELL; break; case SANDYBRIDGE: case SANDYBRIDGE_EP: { int socket_fd = thread_socketFD[cpuId]; hasPCICounters = 1; power_init(0); /* FIXME Static coreId is dangerous */ pci_init(socket_fd); perfmon_counter_map = sandybridge_counter_map; perfmon_numCounters = NUM_COUNTERS_SANDYBRIDGE; perfmon_numCountersCore = NUM_COUNTERS_CORE_SANDYBRIDGE; perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_SANDYBRIDGE; } break; default: ERROR_PLAIN_PRINT(Unsupported Processor); break; } break; case MIC_FAMILY: switch ( cpuid_info.model ) { case XEON_PHI: perfmon_counter_map = phi_counter_map; perfmon_numCounters = NUM_COUNTERS_PHI; perfmon_numCountersCore = NUM_COUNTERS_CORE_PHI; break; default: ERROR_PLAIN_PRINT(Unsupported Processor); break; } break; case K8_FAMILY: perfmon_counter_map = k10_counter_map; perfmon_numCounters = NUM_COUNTERS_K10; perfmon_numCountersCore = NUM_COUNTERS_CORE_K10; break; case K10_FAMILY: perfmon_counter_map = k10_counter_map; perfmon_numCounters = NUM_COUNTERS_K10; perfmon_numCountersCore = NUM_COUNTERS_CORE_K10; break; case K15_FAMILY: perfmon_counter_map = interlagos_counter_map; perfmon_numCounters = NUM_COUNTERS_INTERLAGOS; perfmon_numCountersCore = NUM_COUNTERS_CORE_INTERLAGOS; break; case K16_FAMILY: perfmon_counter_map = kabini_counter_map; perfmon_numCounters = NUM_COUNTERS_KABINI; perfmon_numCountersCore = NUM_COUNTERS_CORE_KABINI; break; default: ERROR_PLAIN_PRINT(Unsupported Processor); break; } }