Пример #1
0
void
likwid_markerThreadInit(void)
{
    int myID = 0, i = 0;
    pthread_t t;
    if ( !likwid_init )
    {
        return;
    }

    pthread_mutex_lock(&globalLock);
    t = pthread_self();
    for (i=0; i<registered_cpus; i++)
    {
        if (pthread_equal(t, threads2Pthread[i]))
        {
            t = 0;
        }
    }
    if (t != 0)
    {
        threads2Pthread[registered_cpus] = t;
        myID = registered_cpus++;
    }
    pthread_mutex_unlock(&globalLock);

    if (getenv("LIKWID_PIN") != NULL)
    {
        cpu_set_t cpuset;
        CPU_ZERO(&cpuset);
        sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset);
        if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus]))
        {
            likwid_pinThread(threads2Cpu[myID % num_cpus]);
            DEBUG_PRINT(DEBUGLEV_DEVELOP, Pin thread %lu to CPU %d currently %d, gettid(), threads2Cpu[myID % num_cpus], sched_getcpu());
        }
    }
}
Пример #2
0
void likwid_markerThreadInit(void)
{
    int myID;
    if ( !likwid_init )
    {
        return;
    }
    
    pthread_mutex_lock(&globalLock);
    myID = registered_cpus++;
    pthread_mutex_unlock(&globalLock);

    if (getenv("LIKWID_PIN") != NULL)
    {
        cpu_set_t cpuset;
        CPU_ZERO(&cpuset);
        sched_getaffinity(gettid(), sizeof(cpu_set_t), &cpuset);
        if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus]))
        {
            likwid_pinThread(threads2Cpu[myID % num_cpus]);
            DEBUG_PRINT(DEBUGLEV_DEVELOP, "Pin thread %lu to CPU %d\n", gettid(), threads2Cpu[myID % num_cpus]);
        }
    }
}
Пример #3
0
void likwid_markerInit(void)
{
    int i;
    int verbosity;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL))
    {
        likwid_init = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n");
        fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    HPMmode(atoi(modeStr));

    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bstrListCreate();
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);
    
    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bstrListCreate();
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
        }
    }

    groupSet->activeGroup = 0;
}
Пример #4
0
void
likwid_markerInit(void)
{
    int i;
    int verbosity;
    int setinit = 0;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    char* perfpid = getenv("LIKWID_PERF_EXECPID");
    char execpid[20];
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0)
    {
        setinit = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

//#ifndef LIKWID_USE_PERFEVENT
    HPMmode(atoi(modeStr));
//#endif
    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);

    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }
#ifdef LIKWID_USE_PERFEVENT
    if (perfpid != NULL)
    {
        snprintf(execpid, 19, "%d", getpid());
        setenv("LIKWID_PERF_PID", execpid, 1);
        char* perfflags = getenv("LIKWID_PERF_FLAGS");
        if (perfflags)
        {
            setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1);
        }
    }
#endif

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
            groupSet->groups[groups[0]].state = STATE_START;
        }
    }
    if (setinit)
    {
        likwid_init = 1;
    }
    threads2Pthread[registered_cpus] = pthread_self();
    registered_cpus++;

    groupSet->activeGroup = 0;

    perfmon_setupCounters(groupSet->activeGroup);
    perfmon_startCounters();
}
Пример #5
0
int main(int argn, char** argc)
{
    int err, i ,j;
    int numCPUs = 0;
    int gid;
    DATATYPE *a,*b,*c,*d;
    TimeData timer;
    double triad_time, copy_time, scale_time, stream_time;
    char estr[1024];
    double result, scalar = 3.0;
    char* ptr;

    if (argn != 3)
    {
        printf("Usage: %s <cpustr> <events>\n", argc[0]);
        return 1;
    }

    strcpy(estr, argc[2]);

    allocate_vector(&a, SIZE);
    allocate_vector(&b, SIZE);
    allocate_vector(&c, SIZE);
    allocate_vector(&d, SIZE);

    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    CpuTopology_t topo = get_cpuTopology();
    affinity_init();
    int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;
    numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
    omp_set_num_threads(numCPUs);
    err = perfmon_init(numCPUs, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        affinity_finalize();
        topology_finalize();
        return 1;
    }
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

#ifdef _OPENMP
    printf(HLINE);
#pragma omp parallel
    {
#pragma omp master
    {
        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
    }
    likwid_pinThread(cpus[omp_get_thread_num()]);
    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
    }
#endif

#pragma omp parallel for
    for (int j=0; j<SIZE; j++) {
        a[j] = 1.0;
        b[j] = 2.0;
        c[j] = 0.0;
        d[j] = 1.0;
    }

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("copy");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j];
            }
            LIKWID_MARKER_STOP("copy");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    copy_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("scale");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                b[j] = scalar*c[j];
            }
            LIKWID_MARKER_STOP("scale");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    scale_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("stream");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j] + b[j];
            }
            LIKWID_MARKER_STOP("stream");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    stream_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {

            LIKWID_MARKER_START("triad");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                a[j] = b[j] +  c[j] * scalar;
            }
            LIKWID_MARKER_STOP("triad");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    triad_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }



    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(4*SIZE*sizeof(DATATYPE)),
                        triad_time,
                        1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}