C++ (Cpp) LIKWID_MARKER_STOP 예제들

예제 #1

0

파일 보기

파일: testmarker-omp.c 프로젝트: ErmannoTufano/likwid

main()
{
    double alpha = 3.14;

    /* Initialize */
    for (int i=0; i<SIZE; i++)
    {
        a[i] = 1.0/(double) i;
        b[i] = 1.0;
        c[i] = (double) i;
    }

    LIKWID_MARKER_INIT;

#pragma omp parallel
    {
        LIKWID_MARKER_THREADINIT;

        LIKWID_MARKER_START("time");
        sleep(2);
        LIKWID_MARKER_STOP("time");

        int threadId = omp_get_thread_num();
        /****************************************************/
#pragma omp for
        for (int j = 0; j < 10; j++)
        {

        LIKWID_MARKER_START("plain");
            for (int k = 0; k < (threadId+1); k++)  {
                for (int i = 0; i < SIZE; i++) 
                {
                    a[i] = b[i] + alpha * c[i];
                    sum += a[i];
                }
            }

        LIKWID_MARKER_STOP("plain");
        }
        printf("Flops performed plain: %g\n",(double)10*SIZE*3);
        /****************************************************/
    }


    LIKWID_MARKER_CLOSE;
    printf( "OK, dofp result = %e\n", sum);
}

예제 #2

0

파일 보기

파일: C-markerAPI.c 프로젝트: mike0042/ohpc

int main(int argc, char* argv[])
{
    int i, g;
    int nevents = 10;
    double events[10];
    double time;
    int count;
    // Init Marker API in serial region once in the beginning
    LIKWID_MARKER_INIT;
    #pragma omp parallel
    {
        // Each thread must add itself to the Marker API, therefore must be
        // in parallel region
        LIKWID_MARKER_THREADINIT;
        // Optional. Register region name
        LIKWID_MARKER_REGISTER("example");
    }

    // perfmon_getNumberOfGroups is not part of the MarkerAPI,
    // it belongs to the normal LIKWID API. But the MarkerAPI
    // has no function to get the number of configured groups.
    for (g=0;g < perfmon_getNumberOfGroups(); g++)
    {
        #pragma omp parallel
        {
            printf("Thread %d sleeps now for %d seconds\n", omp_get_thread_num(), SLEEPTIME);
            // Start measurements inside a parallel region
            LIKWID_MARKER_START("example");
            // Insert your code here.
            // Often contains an OpenMP for pragma. Regions can be nested.
            sleep(SLEEPTIME);
            // Stop measurements inside a parallel region
            LIKWID_MARKER_STOP("example");
            printf("Thread %d wakes up again\n", omp_get_thread_num());
            // If you need the performance data inside your application, use
            LIKWID_MARKER_GET("example", &nevents, events, &time, &count);
            // where events is an array of doubles with nevents entries,
            // time is a double* and count an int*.
            printf("Region example measures %d events, total measurement time is %f\n", nevents, time);
            printf("The region was called %d times\n", count);
            for (i = 0; i < nevents; i++)
            {
                printf("Event %d: %f\n", i, events[i]);
            }
            // If multiple groups given, you can switch to the next group
            LIKWID_MARKER_SWITCH;
        }
    }

    // Close Marker API and write results to file for further evaluation done
    // by likwid-perfctr
    LIKWID_MARKER_CLOSE;
    return 0;
}

예제 #3

0

파일 보기

파일: testmarker-cnt.c 프로젝트: RRZE-HPC/likwid

int main(int argc, char* argv[])
{
    int i, j ;
    double alpha = 3.14;

    /* Initialize */
    for (i=0; i<SIZE; i++)
    {
        a[i] = 1.0/(double) i;
        b[i] = 1.0;
        c[i] = (double) i;
    }
    LIKWID_MARKER_INIT;

//    likwid_pinProcess(2);
    printf("Main running on core %d\n", likwid_getProcessorId());


/****************************************************/
#pragma omp parallel
    {
        LIKWID_MARKER_THREADINIT;
        char* label = malloc(40*sizeof(char));
        int threadId = omp_get_thread_num();
//        likwid_pinThread(threadId);
        printf("Thread running on core %d\n", likwid_getProcessorId());

        for (int counter=1; counter< 3; counter++)
        {
            sprintf(label,"plain-%d",counter);
#pragma omp barrier
            LIKWID_MARKER_START(label);
            for (j = 0; j < counter * threadId; j++)
            {
                for (i = 0; i < SIZE; i++) 
                {
                    a[i] = b[i] + alpha * c[i];
                    sum += a[i];
                }
            }
#pragma omp barrier
            LIKWID_MARKER_STOP(label);
            printf("Flops performed thread %d region %s: %g\n",threadId, label,(double)counter*threadId*SIZE*3);
        }
        free(label);
    }
/****************************************************/


    LIKWID_MARKER_CLOSE;
    printf( "OK, dofp result = %e\n", sum);
}

예제 #4

0

파일 보기

파일: stream_cilk.c 프로젝트: ProgramFan/likwid

int main(){
    int i, k;
    int nworkers, totalworkers;
    char cpuCount[20];
    double *a, *b, *c, *d;
    double sums[2000];
    cpu_set_t cpuset;
    TimeData timer;
    double triad_time, copy_time, total = 0;

    nprocessors = sysconf(_SC_NPROCESSORS_CONF);

    nworkers = cilk_spawn get_nworkers();
    totalworkers = cilk_spawn get_totalworkers();

    for (i=0;i<nworkers;i++)
    {
        sums[i] = 0;
    }

    LIKWID_MARKER_INIT;

    cilk_spawn allocate_vector(&a, SIZE);
    cilk_spawn allocate_vector(&b, SIZE);
    cilk_spawn allocate_vector(&c, SIZE);
    cilk_spawn allocate_vector(&d, SIZE);
    cilk_sync;

    for (i=0; i<SIZE; i++) {
        a[i] = 1.0;
        b[i] = 2.0;
        c[i] = 0.0;
        d[i] = 1.0;
    }

    time_start(&timer);
    for (k=0; k<ITER; k++)
    {
        for (i=0;i<nworkers;i++)
        {
            cilk_spawn LIKWID_MARKER_START("copy");
        }
        cilk_sync;
        cilk_for(i=0;i<SIZE;i++)
        {
            c[i] = a[i];
        }
        for (i=0;i<nworkers;i++)
        {
            cilk_spawn LIKWID_MARKER_STOP("copy");
        }
        cilk_sync;
    }
    time_stop(&timer);
    copy_time = time_print(&timer)/(double)ITER;

    time_start(&timer);
    for (k=0; k<ITER; k++)
    {
        for (i=0;i<nworkers;i++)
        {
            cilk_spawn LIKWID_MARKER_START("triad");
        }
        cilk_sync;
        cilk_for(i=0;i<SIZE;i++)
        {
            a[i] = b[i] +  c[i] * d[i];
        }
        for (i=0;i<nworkers;i++)
        {
            cilk_spawn LIKWID_MARKER_STOP("triad");
        }
        cilk_sync;
    }
    time_stop(&timer);
    triad_time = time_print(&timer)/(double)ITER;
    
    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(double)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(double))/copy_time));
    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(4*SIZE*sizeof(double)),
                        triad_time,
                        1E-6*((4*SIZE*sizeof(double))/triad_time));

    printf("Main PID %d\n",getpid());
    for (i=0;i<nworkers;i++)
    {
        cilk_spawn show_thread();
    }
    cilk_sync;

    LIKWID_MARKER_CLOSE;
}

예제 #5

0

파일 보기

파일: stream-API.c 프로젝트: ProgramFan/likwid

int main(int argn, char** argc)
{
    int err, i ,j;
    int numCPUs = 0;
    int gid;
    DATATYPE *a,*b,*c,*d;
    TimeData timer;
    double triad_time, copy_time, scale_time, stream_time;
    char estr[1024];
    double result, scalar = 3.0;
    char* ptr;

    if (argn != 3)
    {
        printf("Usage: %s <cpustr> <events>\n", argc[0]);
        return 1;
    }

    strcpy(estr, argc[2]);

    allocate_vector(&a, SIZE);
    allocate_vector(&b, SIZE);
    allocate_vector(&c, SIZE);
    allocate_vector(&d, SIZE);

    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    CpuTopology_t topo = get_cpuTopology();
    affinity_init();
    int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;
    numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
    omp_set_num_threads(numCPUs);
    err = perfmon_init(numCPUs, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        affinity_finalize();
        topology_finalize();
        return 1;
    }
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

#ifdef _OPENMP
    printf(HLINE);
#pragma omp parallel
    {
#pragma omp master
    {
        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
    }
    likwid_pinThread(cpus[omp_get_thread_num()]);
    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
    }
#endif

#pragma omp parallel for
    for (int j=0; j<SIZE; j++) {
        a[j] = 1.0;
        b[j] = 2.0;
        c[j] = 0.0;
        d[j] = 1.0;
    }

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("copy");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j];
            }
            LIKWID_MARKER_STOP("copy");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    copy_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("scale");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                b[j] = scalar*c[j];
            }
            LIKWID_MARKER_STOP("scale");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    scale_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("stream");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j] + b[j];
            }
            LIKWID_MARKER_STOP("stream");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    stream_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {

            LIKWID_MARKER_START("triad");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                a[j] = b[j] +  c[j] * scalar;
            }
            LIKWID_MARKER_STOP("triad");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    triad_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }



    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(4*SIZE*sizeof(DATATYPE)),
                        triad_time,
                        1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}

예제 #6

0

파일 보기

파일: stream.c 프로젝트: chris-wood/cpu-profile

int
main()
    {
    int			quantum, checktick();
    int			BytesPerWord;
    register int	j, k;
    double		scalar, t, times[4][NTIMES];

    /* --- SETUP --- determine precision and check timing --- */

    printf(HLINE);
    printf("STREAM version $Revision: 5.8 $\n");
    printf(HLINE);
    BytesPerWord = sizeof(double);
    printf("This system uses %d bytes per DOUBLE PRECISION word.\n",
	BytesPerWord);

    printf(HLINE);
    printf("Array size = %d, Offset = %d\n" , N, OFFSET);
    printf("Total memory required = %.1f MB.\n",
	(3.0 * BytesPerWord) * ( (double) N / 1048576.0));
    printf("Each test is run %d times, but only\n", NTIMES);
    printf("the *best* time for each is used.\n");

#ifdef LIKWID_PERFMON
    printf("Using likwid\n");
#endif

    LIKWID_MARKER_INIT;

#ifdef _OPENMP
    printf(HLINE);
#pragma omp parallel
    {
	LIKWID_MARKER_THREADINIT;
#pragma omp master
	{
	    k = omp_get_num_threads();
	    printf ("Number of Threads requested = %i\n",k);
    }

    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),threadGetProcessorId());
    }
#endif

    LIKWID_MARKER_START("init");
    /* Get initial value for system clock. */
//#pragma omp parallel for
    for (j=0; j<N; j++) {
	a[j] = 1.0;
	b[j] = 2.0;
	c[j] = 0.0;
	}
    LIKWID_MARKER_STOP("init");

    printf(HLINE);

    if  ( (quantum = checktick()) >= 1) 
	printf("Your clock granularity/precision appears to be "
	    "%d microseconds.\n", quantum);
    else {
	printf("Your clock granularity appears to be "
	    "less than one microsecond.\n");
	quantum = 1;
    }

    t = mysecond();
#pragma omp parallel for
    for (j = 0; j < N; j++)
	a[j] = 2.0E0 * a[j];
    t = 1.0E6 * (mysecond() - t);

    printf("Each test below will take on the order"
	" of %d microseconds.\n", (int) t  );
    printf("   (= %d clock ticks)\n", (int) (t/quantum) );
    printf("Increase the size of the arrays if this shows that\n");
    printf("you are not getting at least 20 clock ticks per test.\n");

    printf(HLINE);

    printf("WARNING -- The above is only a rough guideline.\n");
    printf("For best results, please be sure you know the\n");
    printf("precision of your system timer.\n");
    printf(HLINE);

    /*	--- MAIN LOOP --- repeat test cases NTIMES times --- */

    scalar = 3.0;
    for (k=0; k<NTIMES; k++)
    {
        times[0][k] = mysecond();
#pragma omp parallel
	{
        LIKWID_MARKER_START("copy");
#pragma omp for
        for (j=0; j<N; j++)
            c[j] = a[j];
        LIKWID_MARKER_STOP("copy");
	}
        times[0][k] = mysecond() - times[0][k];

        times[1][k] = mysecond();
#pragma omp parallel
	{
        LIKWID_MARKER_START("scale");
#pragma omp for
        for (j=0; j<N; j++)
            b[j] = scalar*c[j];
        LIKWID_MARKER_STOP("scale");
	}
        times[1][k] = mysecond() - times[1][k];

        times[2][k] = mysecond();
#pragma omp parallel
	{
        LIKWID_MARKER_START("add");
#pragma omp for
        for (j=0; j<N; j++)
            c[j] = a[j]+b[j];
        LIKWID_MARKER_STOP("add");
	}
        times[2][k] = mysecond() - times[2][k];

        times[3][k] = mysecond();
#pragma omp parallel
	{
        LIKWID_MARKER_START("triad");
#pragma omp for
        for (j=0; j<N; j++)
            a[j] = b[j]+scalar*c[j];
        LIKWID_MARKER_STOP("triad");
	}
        times[3][k] = mysecond() - times[3][k];
    }

    /*	--- SUMMARY --- */

    for (k=1; k<NTIMES; k++) /* note -- skip first iteration */
	{
	for (j=0; j<4; j++)
	    {
	    avgtime[j] = avgtime[j] + times[j][k];
	    mintime[j] = MIN(mintime[j], times[j][k]);
	    maxtime[j] = MAX(maxtime[j], times[j][k]);
	    }
	}

    printf("Function      Rate (MB/s)   Avg time     Min time     Max time\n");
    for (j=0; j<4; j++) {
	avgtime[j] = avgtime[j]/(double)(NTIMES-1);

	printf("%s%11.4f  %11.4f  %11.4f  %11.4f\n", label[j],
	       1.0E-06 * bytes[j]/mintime[j],
	       avgtime[j],
	       mintime[j],
	       maxtime[j]);
    }
    printf(HLINE);

    /* --- Check Results --- */
    checkSTREAMresults();
    printf(HLINE);

    LIKWID_MARKER_CLOSE;
    return 0;
}

예제 #7

0

파일 보기

파일: heat-2d.c 프로젝트: apc-llc/heat-2d

int main(int argc, char * argv[]) {
    long int t, i, j, k;
    const int BASE = 1024;

    // for timekeeping
    int ts_return = -1;
    struct timeval start, end, result;
    double tdiff = 0.0;
    
    int T;

    printf("Please enter number of timesteps = \n");
    scanf("%d", &T);

    printf("Number of points = %ld\t|Number of timesteps = %ld\t", N*N, T);

    /* Initialization */
    srand(42); // seed with a constant value to verify results

    for (i = 0; i < N+2; i++) {
        for (j = 0; j < N+2; j++) {
            A[0][i][j] = 1.0 * (rand() % BASE);
        }
    }

#ifdef USE_LIKWID
#pragma omp parallel
{
LIKWID_MARKER_START("Compute_omp");
}
#endif

    #pragma acc data create(A[0:2][0:N+2][0:N+2])
    {
        #pragma acc update device(A[0:2][0:N+2][0:N+2])

#ifdef TIME
    gettimeofday(&start, 0);
#endif

#pragma scop
    for (t = 0; t < T; t++) {

        #pragma acc kernels loop independent present(A[0:2][0:N+2][0:N+2])
        for (i = 1; i < N+1; i++) {
            #pragma acc loop independent
            for (j = 1; j < N+1; j++) {
                A[(t+1)%2][i][j] =   0.125 * (A[t%2][i+1][j] - 2.0 * A[t%2][i][j] + A[t%2][i-1][j])
                                 + 0.125 * (A[t%2][i][j+1] - 2.0 * A[t%2][i][j] + A[t%2][i][j-1])
                                 + A[t%2][i][j];
            }
        }
    }
#pragma endscop

#ifdef TIME
    gettimeofday(&end, 0);

    ts_return = timeval_subtract(&result, &end, &start);
    tdiff = (double)(result.tv_sec + result.tv_usec * 1.0e-6);

    printf("|Time taken =  %7.5lfms\t", tdiff * 1.0e3);
    printf("|MFLOPS =  %f\t", ((((double)NUM_FP_OPS * N *N *  T) / tdiff) / 1000000L));
#endif

        #pragma acc update host(A[0:2][0:N+2][0:N+2])

	} // acc data create

#ifdef USE_LIKWID
#pragma omp parallel
{
LIKWID_MARKER_STOP("Compute_omp");
}
#endif


#ifdef VERIFY
    for (i = 1; i < N+1; i++) {
        for (j = 1; j < N+1; j++) {
            total+= A[T%2][i][j] ;
        }
    }
    printf("|sum: %e\t", total);
    for (i = 1; i < N+1; i++) {
        for (j = 1; j < N+1; j++) {
            sum_err_sqr += (A[T%2][i][j] - (total/N))*(A[T%2][i][j] - (total/N));
        }
    }
    printf("|rms(A) = %7.2f\t", sqrt(sum_err_sqr));
    for (i = 1; i < N+1; i++) {
        for (j = 1; j < N+1; j++) {
            chtotal += ((char *)A[T%2][i])[j];
        }
    }
    printf("|sum(rep(A)) = %d\n", chtotal);
#endif
    return 0;
}