Exemple #1
0
int main( int argc, char *argv[ ] ){

	allocator_init();
	int testNumber = 0;

	if( argc != 2 ){
		printf("Usage: %s | 1 | 2 | 3 | 4 | 5 | 6 |\n", argv[ 0 ] );
		return 0;
	}

	testNumber = atoi(argv[1]);

	if ( ( testNumber < 1 ) || ( testNumber > 6 )  ) {
		printf("\nError.\nOption %d is an invalid token!.\n", testNumber );
		printf( "Please choose a test number between 1 and 6 \n\n" );
		return 0; 
	}

	if ( testNumber == 1 ) { runTime( test1 ); } 
	else if ( testNumber == 2 ){ runTime( test2 ); } 
	else if ( testNumber == 3 ){ runTime( test3 ); }
	else if ( testNumber == 4 ){ runTime( test4 ); }         
	else if ( testNumber == 5 ){ runTime( test5 ); } 
	else if ( testNumber == 6 ){ runTime( test6 ); }
	
	allocator_finalize();
	
	return 0;
}
Exemple #2
0
Fichier : Oauth.c Projet : bomma/io
static void signctx_init(struct signctx *self)
{
	allocator_init(&self->mAllocator, 256);
	vector_init(&self->signatureParams, sizeof(struct signature_param));
	string_init(&self->signatureParamsBuf);
	string_init(&self->signatureSeed);
}
Exemple #3
0
struct compiler *compiler_create(const char* input, const char *filename)
{
	struct compiler* result = malloc(sizeof(struct compiler));

	result->filename = filename;
	result->err_count = 0;
	result->current_block = 0;

    allocator_init(&result->allocator);

    lexer_create(result, input);

    return result;
}
Exemple #4
0
double frageval(int alg, int seed, int requests){
srand(seed);

  int r = 0;
  int s = 0;
  void *p = NULL;
allocator_init(10240);
  while (r < requests){
    s = (rand() % 900) + 100;

    p = allocate(alg, s);

    deallocate(p);
  }

  double frag = average_frag();
  
  return frag;
}
/** @fn DataBuffer_t::DataBuffer_t(size_t allocSize )
  * Constructor.
  * Allocate ION (other allocator) mapped memory.
  *
  * @param size_t allocSize - required size in bytes.
*/
DataBuffer_t::DataBuffer_t(size_t allocSize )
{
#ifndef DVP_USE_ION
    pData = calloc(1, allocSize);
    if ( pData)
        nBuffSize = allocSize;
    bytesWritten = 0;
#else // DVP_USE_ION

    mutex_init(&mBuffLock);

    pAllocator = allocator_init();
    if( pAllocator )
    {
        dims[0].img.bpp = 1;
        dims[0].img.width = allocSize;
        dims[0].img.height = 1;
        dims[0].dim.x = allocSize;
        dims[0].dim.y = 1;
        dims[0].dim.z = 1;

        memset((void*)handles, 0, sizeof(handles));

        if( false_e == allocator_calloc( pAllocator, ALLOCATOR_MEMORY_TYPE_TILED_1D_UNCACHED, 1,
                                         1, dims, &pData, handles, &strides )                           )
        {
            pData = NULL;
        }

        if ( pData )
            nBuffSize = allocSize;
        else
            nBuffSize = 0;

        bytesWritten = 0;
    }
#endif // DVP_USE_ION
}
Exemple #6
0
int 
main(int argc, char* argv[])
{
  int* arr[ALLOC_TIMES];
  int i, counter;
  clock_t beg, end;

  allocator_init();

  counter = 0;
  beg = clock();
  while (counter++ < LOOP_TIMES) {
    for (i = 0; i < ALLOC_TIMES; ++i) 
      arr[i] = (int*)malloc(sizeof(int));
    
    for (i = 0; i < ALLOC_TIMES; ++i) 
      free(arr[i]);
  }
  end = clock();
  fprintf(stdout, "default use : %lu\n", end - beg);
  
  counter = 0;
  beg = clock();
  while (counter++ < LOOP_TIMES) {
    for (i = 0; i < ALLOC_TIMES; ++i) 
      arr[i] = (int*)al_malloc(sizeof(int));
    
    for (i = 0; i < ALLOC_TIMES; ++i) 
      al_free(arr[i]);
  }
  end = clock();
  fprintf(stdout, "allocator use : %lu\n", end - beg);


  allocator_destroy();

  return 0;
}
Exemple #7
0
int EPLIB_init()
{
    process_env_vars();

    init_sig_handlers();

    PMPI_Comm_rank(MPI_COMM_WORLD, &taskid);
    PMPI_Comm_size(MPI_COMM_WORLD, &num_tasks);

    set_local_uuid(taskid);

    allocator_init();

    if (max_ep == 0) return MPI_SUCCESS;

    /* Initialize client */
    client_init(taskid, num_tasks);

    /* Register MPI type and MPI Op before any other cqueue commands */
    cqueue_mpi_type_register();
    cqueue_mpi_op_register();

    /* Initialize communicator handles table */
    handle_init();

    /* Initialize window object table */
    window_init();

    /* Create server world and peer comm for MPI_COMM_WORLD */
    EPLIB_split_comm(MPI_COMM_WORLD, 0, taskid, MPI_COMM_WORLD);

    if (std_mpi_mode == STD_MPI_MODE_IMPLICIT)
        block_coll_request = malloc(max_ep*sizeof(MPI_Request));

    return MPI_SUCCESS;
}
int main(int argc, char *argv[]) {
	int policy;
	unsigned int seed;
	int requests;
	/* temp pointer to the memory */
	void *p = NULL;
	int counter=0;
	int s; /* temp size of the allocated memory */
	double frag; /* Average fragmentation size */
	
	if(argc!=4){
		printf("usage: frag-eval [algorithm] [seed] [int requests]\n");
		exit(-1);
	}else{
		policy=atoi(argv[1]);
		seed=atoi(argv[2]);
		requests=atoi(argv[3]);
	}

#ifdef DEBUG
	printf("policy: %d, seed: %u, requests: %d\n", policy, seed, requests);
#endif /* DEBUG */
	
	srand(seed);
	
	if(allocator_init(SIZE, policy)){
		printf("init failed");
		exit(-1);
	}
	
#ifdef DEBUG
	printf("successful init\n");
#endif /* DEBUG */
	
	while(counter<requests) {
		s = rand()%900+100;
		printf("#################### Allocating size: %d#######################\n", s);
		p = allocate(policy, s);
		if(NULL==p) printf("\n");
		else{
			deallocate(p);
			printf("------------------ Deallocated size: %d------------------\n\n",s);
		}
		counter++;
	}
	
	frag = average_frag();
	printf("Average fragmentation size: %f\n", frag);
/*
	printf("**********allocating blk1: 128************\n");
	void *ptr1;
	ptr1=allocate(128);
	if(NULL==ptr1)
		printf("allocation failed.\n");
	else printf("blk1 is allocated: %p\n", ptr1);
	
	printf("**********allocating blk2: 512************\n");
	void *ptr2;
	ptr2=allocate(512);
	if(NULL==ptr2)
		printf("allocation failed.\n");
	else printf("blk2 is allocated: %p\n", ptr2);
	

	printf("**********allocating blk3: 64************\n");
	void *ptr3;
	ptr3=allocate(64);
	if(NULL==ptr3)
		printf("allocation failed.\n");
	else printf("blk3 is allocated: %p\n", ptr3);

	
	printf("**********Deallocating blk2: 512************\n");
	if(deallocate(ptr2)){
		printf("failed deallocation");
		exit(-1);
	}else printf("blk2 is deallocated.\n");


	printf("**********allocating blk4: 256************\n");
	void *ptr4;
	ptr4=allocate(256);	
	if(NULL==ptr4)
		printf("allocation failed.\n");
	else printf("blk4 is allocated: %p\n", ptr4);

	printf("**********Deallocating blk3: 64************\n");
	if(deallocate(ptr3)){
		printf("failed deallocation");
		exit(-1);
	}else printf("blk3 is deallocated.\n");

	printf("**********allocating blk5: 64************\n");
	void *ptr5;
	ptr5=allocate(64);
	if(NULL==ptr5)
		printf("allocation failed.\n");
	else printf("blk5 is allocated: %p\n", ptr5);
	
	printf("**********allocating blk6: 512************\n");
	void *ptr6;
	ptr6=allocate(512);
	if(NULL==ptr6)
		printf("allocation failed.\n");
	else printf("blk6 is allocated: %p\n", ptr6);
*/
	return 0;
}
Exemple #9
0
int main(int argc, char** argv)
{
    int iter = 100;
    uint32_t i;
    uint32_t j;
    int globalNumberOfThreads = 0;
    int optPrintDomains = 0;
    int c;
    ThreadUserData myData;
    bstring testcase = bfromcstr("none");
    uint32_t numberOfWorkgroups = 0;
    int tmp = 0;
    double time;
    const TestCase* test = NULL;
    Workgroup* currentWorkgroup = NULL;
    Workgroup* groups = NULL;

    cpuid_init();
    numa_init();
    affinity_init();

    /* Handling of command line options */
    if (argc ==  1) { HELP_MSG; }

    while ((c = getopt (argc, argv, "g:w:t:i:l:aphv")) != -1) {
        switch (c)
        {
            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);    
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);    
            case 'a':
                printf(TESTS"\n");
                exit (EXIT_SUCCESS);    
            case 'w':
                tmp--;

                if (tmp == -1)
                {
                    fprintf (stderr, "More workgroups configured than allocated!\n");
                    return EXIT_FAILURE;
                }
                if (!test)
                {
                    fprintf (stderr, "You need to specify a test case first!\n");
                    return EXIT_FAILURE;
                }
                testcase = bfromcstr(optarg);
                currentWorkgroup = groups+tmp;  /*FIXME*/
                bstr_to_workgroup(currentWorkgroup, testcase, test->type, test->streams);
                bdestroy(testcase);

                for (i=0; i<  test->streams; i++)
                {
                    if (currentWorkgroup->streams[i].offset%test->stride)
                    {
                        fprintf (stderr, "Stream %d: offset is not a multiple of stride!\n",i);
                        return EXIT_FAILURE;
                    }

                    allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
                            PAGE_ALIGNMENT,
                            currentWorkgroup->size,
                            currentWorkgroup->streams[i].offset,
                            test->type,
                            currentWorkgroup->streams[i].domain);
                }

                break;
            case 'i':
                iter =  atoi(optarg);
                break;
            case 'l':
                testcase = bfromcstr(optarg);
                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (biseqcstr(testcase,"none"))
                {
                    fprintf (stderr, "Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                else
                {
                    printf("Name: %s\n",test->name);
                    printf("Number of streams: %d\n",test->streams);
                    printf("Loop stride: %d\n",test->stride);
                    printf("Flops: %d\n",test->flops);
                    printf("Bytes: %d\n",test->bytes);
                    switch (test->type)
                    {
                        case SINGLE:
                            printf("Data Type: Single precision float\n");
                            break;
                        case DOUBLE:
                            printf("Data Type: Double precision float\n");
                            break;
                    }
                }
                bdestroy(testcase);
                exit (EXIT_SUCCESS);    

                break;
            case 'p':
                optPrintDomains = 1;
                break;
            case 'g':
                numberOfWorkgroups =  atoi(optarg);
                allocator_init(numberOfWorkgroups * MAX_STREAMS);
                tmp = numberOfWorkgroups;
                groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
                break;
            case 't':
                testcase = bfromcstr(optarg);

                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (biseqcstr(testcase,"none"))
                {
                    fprintf (stderr, "Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                bdestroy(testcase);
                break;
            case '?':
                if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                return EXIT_FAILURE;
            default:
                HELP_MSG;
        }
    }


    if (optPrintDomains)
    {
        affinity_printDomains();
        exit (EXIT_SUCCESS);    
    }
    timer_init();

 /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread
  * module only allows equally sized thread groups*/
    for (i=0; i<numberOfWorkgroups; i++)
    {
        globalNumberOfThreads += groups[i].numberOfThreads;
    }

    threads_init(globalNumberOfThreads);
    threads_createGroups(numberOfWorkgroups);

    /* we configure global barriers only */
    barrier_init(1);
    barrier_registerGroup(globalNumberOfThreads);

#ifdef PERFMON
    printf("Using likwid\n");
    likwid_markerInit();
#endif


    /* initialize data structures for threads */
    for (i=0; i<numberOfWorkgroups; i++)
    {
        myData.iter = iter;
        myData.size = groups[i].size;
        myData.test = test;
        myData.numberOfThreads = groups[i].numberOfThreads;
        myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int));
        myData.streams = (void**) malloc(test->streams * sizeof(void*));

        for (j=0; j<groups[i].numberOfThreads; j++)
        {
            myData.processors[j] = groups[i].processorIds[j];
        }

        for (j=0; j<  test->streams; j++)
        {
            myData.streams[j] = groups[i].streams[j].ptr;
        }

        threads_registerDataGroup(i, &myData, copyThreadData);

        free(myData.processors);
        free(myData.streams);
    }

    printf(HLINE);
    printf("LIKWID MICRO BENCHMARK\n"); 
    printf("Test: %s\n",test->name); 
    printf(HLINE);
    printf("Using %d work groups\n",numberOfWorkgroups);
    printf("Using %d threads\n",globalNumberOfThreads);
    printf(HLINE);

    threads_create(runTest); 
    threads_destroy();
    allocator_finalize();

    time = (double) threads_data[0].cycles / (double) timer_getCpuClock();
    printf("Cycles: %llu \n", LLU_CAST threads_data[0].cycles);
    printf("Iterations: %llu \n", LLU_CAST iter);
    printf("Size: %d \n",  currentWorkgroup->size );
    printf("Vectorlength: %d \n", threads_data[0].data.size);
    printf("Time: %e sec\n", time);
    printf("MFlops/s:\t%.2f\n",
            1.0E-06 * ((double) numberOfWorkgroups * iter * currentWorkgroup->size *  test->flops/  time));
    printf("MByte/s:\t%.2f\n",
            1.0E-06 * ( (double) numberOfWorkgroups * iter * currentWorkgroup->size *  test->bytes/ time));
    printf("Cycles per update:\t%f\n",
            ((double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));

	switch ( test->type )
    {
        case SINGLE:
    printf("Cycles per cacheline:\t%f\n",
            (16.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));
            break;
        case DOUBLE:
    printf("Cycles per cacheline:\t%f\n",
            (8.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));
            break;
    }

    printf(HLINE);
#ifdef PERFMON
   likwid_markerClose();
#endif

    return EXIT_SUCCESS;
}
Exemple #10
0
int main(int argc, char** argv)
{
    uint64_t iter = 100;
    uint32_t i;
    uint32_t j;
    int globalNumberOfThreads = 0;
    int optPrintDomains = 0;
    int c;
    ThreadUserData myData;
    bstring testcase = bfromcstr("none");
    uint64_t numberOfWorkgroups = 0;
    int tmp = 0;
    double time;
    double cycPerUp = 0.0;
    const TestCase* test = NULL;
    uint64_t realSize = 0;
    uint64_t realIter = 0;
    uint64_t maxCycles = 0;
    uint64_t minCycles = UINT64_MAX;
    uint64_t cyclesClock = 0;
    uint64_t demandIter = 0;
    TimerData itertime;
    Workgroup* currentWorkgroup = NULL;
    Workgroup* groups = NULL;
    uint32_t min_runtime = 1; /* 1s */
    bstring HLINE = bfromcstr("");
    binsertch(HLINE, 0, 80, '-');
    binsertch(HLINE, 80, 1, '\n');
    int (*ownprintf)(const char *format, ...);
    ownprintf = &printf;

    /* Handling of command line options */
    if (argc ==  1)
    {
        HELP_MSG;
        exit(EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) {
        switch (c)
        {
            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case 'a':
                ownprintf(TESTS"\n");
                exit (EXIT_SUCCESS);
            case 'w':
                numberOfWorkgroups++;
                break;
            case 's':
                min_runtime = atoi(optarg);
                break;
            case 'i':
                demandIter = strtoul(optarg, NULL, 10);
                if (demandIter <= 0)
                {
                    fprintf (stderr, "Error: Iterations must be greater than 0\n");
                    return EXIT_FAILURE;
                }
                break;
            case 'l':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);
                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                else
                {
                    ownprintf("Name: %s\n",test->name);
                    ownprintf("Number of streams: %d\n",test->streams);
                    ownprintf("Loop stride: %d\n",test->stride);
                    ownprintf("Flops: %d\n",test->flops);
                    ownprintf("Bytes: %d\n",test->bytes);
                    switch (test->type)
                    {
                        case INT:
                            ownprintf("Data Type: Integer\n");
                            break;
                        case SINGLE:
                            ownprintf("Data Type: Single precision float\n");
                            break;
                        case DOUBLE:
                            ownprintf("Data Type: Double precision float\n");
                            break;
                    }
                    if (test->loads >= 0)
                    {
                        ownprintf("Load Ops: %d\n",test->loads);
                    }
                    if (test->stores >= 0)
                    {
                        ownprintf("Store Ops: %d\n",test->stores);
                    }
                    if (test->branches >= 0)
                    {
                        ownprintf("Branches: %d\n",test->branches);
                    }
                    if (test->instr_const >= 0)
                    {
                        ownprintf("Constant instructions: %d\n",test->instr_const);
                    }
                    if (test->instr_loop >= 0)
                    {
                        ownprintf("Loop instructions: %d\n",test->instr_loop);
                    }
                }
                bdestroy(testcase);
                exit (EXIT_SUCCESS);

                break;
            case 'p':
                optPrintDomains = 1;
                break;
            case 'g':
                numberOfWorkgroups = LLU_CAST atol(optarg);

                tmp = numberOfWorkgroups;

                break;
            case 't':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);

                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                bdestroy(testcase);
                break;
            case '?':
                if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                return EXIT_FAILURE;
            default:
                HELP_MSG;
        }
    }
    if ((numberOfWorkgroups == 0) && (!optPrintDomains))
    {
        fprintf(stderr, "Error: At least one workgroup (-w) must be set on commandline\n");
        exit (EXIT_FAILURE);
    }

    if (topology_init() != EXIT_SUCCESS)
    {
        fprintf(stderr, "Error: Unsupported processor!\n");
        exit(EXIT_FAILURE);
    }

    if ((test == NULL) && (!optPrintDomains))
    {
        fprintf(stderr, "Unknown test case. Please check likwid-bench -a for available tests\n");
        fprintf(stderr, "and select one using the -t commandline option\n");
        exit(EXIT_FAILURE);
    }

    numa_init();
    affinity_init();
    timer_init();

    if (optPrintDomains)
    {
        bdestroy(testcase);
        AffinityDomains_t affinity = get_affinityDomains();
        ownprintf("Number of Domains %d\n",affinity->numberOfAffinityDomains);
        for (i=0; i < affinity->numberOfAffinityDomains; i++ )
        {
            ownprintf("Domain %d:\n",i);
            ownprintf("\tTag %s:",bdata(affinity->domains[i].tag));

            for ( uint32_t j=0; j < affinity->domains[i].numberOfProcessors; j++ )
            {
                ownprintf(" %d",affinity->domains[i].processorList[j]);
            }
            ownprintf("\n");
        }
        exit (EXIT_SUCCESS);
    }

    allocator_init(numberOfWorkgroups * MAX_STREAMS);
    groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
    tmp = 0;

    optind = 0;
    while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1)
    {
        switch (c)
        {
            case 'w':
                currentWorkgroup = groups+tmp;
                bstring groupstr = bfromcstr(optarg);
                i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams);
                bdestroy(groupstr);
                if (i == 0)
                {
                    for (i=0; i<  test->streams; i++)
                    {
                        if (currentWorkgroup->streams[i].offset%test->stride)
                        {
                            fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i);
                            return EXIT_FAILURE;
                        }
                        allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
                                PAGE_ALIGNMENT,
                                currentWorkgroup->size,
                                currentWorkgroup->streams[i].offset,
                                test->type,
                                currentWorkgroup->streams[i].domain);
                    }
                    tmp++;
                }
                else
                {
                    exit(EXIT_FAILURE);
                }
                break;
            default:
                continue;
                break;
        }
    }

    /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread
     * module only allows equally sized thread groups*/
    for (i=0; i<numberOfWorkgroups; i++)
    {
        globalNumberOfThreads += groups[i].numberOfThreads;
    }

    ownprintf(bdata(HLINE));
    ownprintf("LIKWID MICRO BENCHMARK\n");
    ownprintf("Test: %s\n",test->name);
    ownprintf(bdata(HLINE));
    ownprintf("Using %" PRIu64 " work groups\n",numberOfWorkgroups);
    ownprintf("Using %d threads\n",globalNumberOfThreads);
    ownprintf(bdata(HLINE));


    threads_init(globalNumberOfThreads);
    threads_createGroups(numberOfWorkgroups);

    /* we configure global barriers only */
    barrier_init(1);
    barrier_registerGroup(globalNumberOfThreads);
    cyclesClock = timer_getCycleClock();

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Using Likwid Marker API\n");
    }
    LIKWID_MARKER_INIT;
    ownprintf(bdata(HLINE));
#endif


    /* initialize data structures for threads */
    for (i=0; i<numberOfWorkgroups; i++)
    {
        myData.iter = iter;
        if (demandIter > 0)
        {
            myData.iter = demandIter;
        }
        myData.min_runtime = min_runtime;
        myData.size = groups[i].size;
        myData.test = test;
        myData.cycles = 0;
        myData.numberOfThreads = groups[i].numberOfThreads;
        myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int));
        myData.streams = (void**) malloc(test->streams * sizeof(void*));

        for (j=0; j<groups[i].numberOfThreads; j++)
        {
            myData.processors[j] = groups[i].processorIds[j];
        }

        for (j=0; j<  test->streams; j++)
        {
            myData.streams[j] = groups[i].streams[j].ptr;
        }

        threads_registerDataGroup(i, &myData, copyThreadData);

        free(myData.processors);
        free(myData.streams);
    }

    if (demandIter == 0)
    {
        getIterSingle((void*) &threads_data[0]);
        for (i=0; i<numberOfWorkgroups; i++)
        {
            iter = threads_updateIterations(i, demandIter);
        }
    }
#ifdef DEBUG_LIKWID
    else
    {
        ownprintf("Using manually selected iterations per thread\n");
    }
#endif

    threads_create(runTest);
    threads_join();

    for (int i=0; i<globalNumberOfThreads; i++)
    {
        realSize += threads_data[i].data.size;
        realIter += threads_data[i].data.iter;
        if (threads_data[i].cycles > maxCycles)
        {
            maxCycles = threads_data[i].cycles;
        }
        if (threads_data[i].cycles < minCycles)
        {
            minCycles = threads_data[i].cycles;
        }
    }



    time = (double) maxCycles / (double) cyclesClock;
    ownprintf(bdata(HLINE));
    ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles);
    ownprintf("CPU Clock:\t\t%" PRIu64 "\n", timer_getCpuClock());
    ownprintf("Cycle Clock:\t\t%" PRIu64 "\n", cyclesClock);
    ownprintf("Time:\t\t\t%e sec\n", time);
    ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter);
    ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter);
    ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride));
    ownprintf("Size:\t\t\t%" PRIu64 "\n",  realSize*test->bytes );
    ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes);
    ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize *  test->flops));
    ownprintf("MFlops/s:\t\t%.2f\n",
            1.0E-06 * ((double) threads_data[0].data.iter * realSize *  test->flops/  time));
    
    ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize *  test->bytes));
    ownprintf("MByte/s:\t\t%.2f\n",
            1.0E-06 * ( (double) threads_data[0].data.iter * realSize *  test->bytes/ time));

    cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize));
    ownprintf("Cycles per update:\t%f\n", cycPerUp);

    switch ( test->type )
    {
        case INT:
        case SINGLE:
            ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp));
            break;
        case DOUBLE:
            ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp));
            break;
    }
    ownprintf("Loads per update:\t%ld\n", test->loads );
    ownprintf("Stores per update:\t%ld\n", test->stores );
    if ((test->loads > 0) && (test->stores > 0))
    {
        ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) );
    }
    if ((test->instr_loop > 0) && (test->instr_const > 0))
    {
        ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const );
    }
    if (test->uops > 0)
    {
        ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter);
    }

    ownprintf(bdata(HLINE));
    threads_destroy(numberOfWorkgroups, test->streams);
    allocator_finalize();
    workgroups_destroy(&groups, numberOfWorkgroups, test->streams);

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Writing Likwid Marker API results to file %s\n", getenv("LIKWID_FILEPATH"));
    }
    LIKWID_MARKER_CLOSE;
#endif

    bdestroy(HLINE);
    return EXIT_SUCCESS;
}