int test_timercpuclock_noinit() { uint64_t cyc = timer_getCpuClock(); if (cyc != 0) return 0; return 1; }
static int lua_likwid_getCpuClock(lua_State* L) { if (timer_isInitialized == 0) { timer_init(); timer_isInitialized = 1; } lua_pushnumber(L,timer_getCpuClock()); return 1; }
int test_timercpuclock() { timer_init(); uint64_t cyc = timer_getCpuClock(); if (cyc == 0) return 0; timer_finalize(); return 1; fail: timer_finalize(); return 0; }
int test_timerinit() { timer_init(); uint64_t clock = timer_getCpuClock(); if (clock == 0) goto fail; timer_finalize(); return 1; fail: timer_finalize(); return 0; }
int test_timerprint_stop() { TimerData timer; timer_init(); timer_reset(&timer); timer_start(&timer); timer_stop(&timer); double time = timer_print(&timer); if (time > 1) goto fail; if (time == 0) goto fail; uint64_t cycles = timer_printCycles(&timer); if (cycles == 0) goto fail; if (cycles > timer_getCpuClock()) goto fail; timer_finalize(); return 1; fail: timer_finalize(); return 0; }
int main(int argc, char** argv) { int iter = 100; uint32_t i; uint32_t j; int globalNumberOfThreads = 0; int optPrintDomains = 0; int c; ThreadUserData myData; bstring testcase = bfromcstr("none"); uint32_t numberOfWorkgroups = 0; int tmp = 0; double time; const TestCase* test = NULL; Workgroup* currentWorkgroup = NULL; Workgroup* groups = NULL; cpuid_init(); numa_init(); affinity_init(); /* Handling of command line options */ if (argc == 1) { HELP_MSG; } while ((c = getopt (argc, argv, "g:w:t:i:l:aphv")) != -1) { switch (c) { case 'h': HELP_MSG; exit (EXIT_SUCCESS); case 'v': VERSION_MSG; exit (EXIT_SUCCESS); case 'a': printf(TESTS"\n"); exit (EXIT_SUCCESS); case 'w': tmp--; if (tmp == -1) { fprintf (stderr, "More workgroups configured than allocated!\n"); return EXIT_FAILURE; } if (!test) { fprintf (stderr, "You need to specify a test case first!\n"); return EXIT_FAILURE; } testcase = bfromcstr(optarg); currentWorkgroup = groups+tmp; /*FIXME*/ bstr_to_workgroup(currentWorkgroup, testcase, test->type, test->streams); bdestroy(testcase); for (i=0; i< test->streams; i++) { if (currentWorkgroup->streams[i].offset%test->stride) { fprintf (stderr, "Stream %d: offset is not a multiple of stride!\n",i); return EXIT_FAILURE; } allocator_allocateVector(&(currentWorkgroup->streams[i].ptr), PAGE_ALIGNMENT, currentWorkgroup->size, currentWorkgroup->streams[i].offset, test->type, currentWorkgroup->streams[i].domain); } break; case 'i': iter = atoi(optarg); break; case 'l': testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (biseqcstr(testcase,"none")) { fprintf (stderr, "Unknown test case %s\n",optarg); return EXIT_FAILURE; } else { printf("Name: %s\n",test->name); printf("Number of streams: %d\n",test->streams); printf("Loop stride: %d\n",test->stride); printf("Flops: %d\n",test->flops); printf("Bytes: %d\n",test->bytes); switch (test->type) { case SINGLE: printf("Data Type: Single precision float\n"); break; case DOUBLE: printf("Data Type: Double precision float\n"); break; } } bdestroy(testcase); exit (EXIT_SUCCESS); break; case 'p': optPrintDomains = 1; break; case 'g': numberOfWorkgroups = atoi(optarg); allocator_init(numberOfWorkgroups * MAX_STREAMS); tmp = numberOfWorkgroups; groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup)); break; case 't': testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (biseqcstr(testcase,"none")) { fprintf (stderr, "Unknown test case %s\n",optarg); return EXIT_FAILURE; } bdestroy(testcase); break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return EXIT_FAILURE; default: HELP_MSG; } } if (optPrintDomains) { affinity_printDomains(); exit (EXIT_SUCCESS); } timer_init(); /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread * module only allows equally sized thread groups*/ for (i=0; i<numberOfWorkgroups; i++) { globalNumberOfThreads += groups[i].numberOfThreads; } threads_init(globalNumberOfThreads); threads_createGroups(numberOfWorkgroups); /* we configure global barriers only */ barrier_init(1); barrier_registerGroup(globalNumberOfThreads); #ifdef PERFMON printf("Using likwid\n"); likwid_markerInit(); #endif /* initialize data structures for threads */ for (i=0; i<numberOfWorkgroups; i++) { myData.iter = iter; myData.size = groups[i].size; myData.test = test; myData.numberOfThreads = groups[i].numberOfThreads; myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int)); myData.streams = (void**) malloc(test->streams * sizeof(void*)); for (j=0; j<groups[i].numberOfThreads; j++) { myData.processors[j] = groups[i].processorIds[j]; } for (j=0; j< test->streams; j++) { myData.streams[j] = groups[i].streams[j].ptr; } threads_registerDataGroup(i, &myData, copyThreadData); free(myData.processors); free(myData.streams); } printf(HLINE); printf("LIKWID MICRO BENCHMARK\n"); printf("Test: %s\n",test->name); printf(HLINE); printf("Using %d work groups\n",numberOfWorkgroups); printf("Using %d threads\n",globalNumberOfThreads); printf(HLINE); threads_create(runTest); threads_destroy(); allocator_finalize(); time = (double) threads_data[0].cycles / (double) timer_getCpuClock(); printf("Cycles: %llu \n", LLU_CAST threads_data[0].cycles); printf("Iterations: %llu \n", LLU_CAST iter); printf("Size: %d \n", currentWorkgroup->size ); printf("Vectorlength: %d \n", threads_data[0].data.size); printf("Time: %e sec\n", time); printf("MFlops/s:\t%.2f\n", 1.0E-06 * ((double) numberOfWorkgroups * iter * currentWorkgroup->size * test->flops/ time)); printf("MByte/s:\t%.2f\n", 1.0E-06 * ( (double) numberOfWorkgroups * iter * currentWorkgroup->size * test->bytes/ time)); printf("Cycles per update:\t%f\n", ((double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size))); switch ( test->type ) { case SINGLE: printf("Cycles per cacheline:\t%f\n", (16.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size))); break; case DOUBLE: printf("Cycles per cacheline:\t%f\n", (8.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size))); break; } printf(HLINE); #ifdef PERFMON likwid_markerClose(); #endif return EXIT_SUCCESS; }
int main (int argc, char** argv) { int socket_fd = -1; int optInfo = 0; int optClock = 0; int optStethoscope = 0; int optSockets = 0; double runtime; int hasDRAM = 0; int c; bstring argString; bstring eventString = bfromcstr("CLOCK"); int numSockets=1; int numThreads=0; int threadsSockets[MAX_NUM_NODES*2]; int threads[MAX_NUM_THREADS]; threadsSockets[0] = 0; if (argc == 1) { HELP_MSG; exit (EXIT_SUCCESS); } while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1) { switch (c) { case 'c': CHECK_OPTION_STRING; numSockets = bstr_to_cpuset_physical((uint32_t*) threadsSockets, argString); bdestroy(argString); optSockets = 1; break; case 'h': HELP_MSG; exit (EXIT_SUCCESS); case 'i': optInfo = 1; break; case 'M': /* Set MSR Access mode */ CHECK_OPTION_STRING; accessClient_setaccessmode(str2int((char*) argString->data)); bdestroy(argString); break; case 'p': optClock = 1; break; case 's': CHECK_OPTION_STRING; optStethoscope = str2int((char*) argString->data); bdestroy(argString); break; case 'v': VERSION_MSG; exit (EXIT_SUCCESS); case '?': if (optopt == 's' || optopt == 'M' || optopt == 'c') { HELP_MSG; } else if (isprint (optopt)) { fprintf (stderr, "Unknown option `-%c'.\n", optopt); } else { fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); } exit( EXIT_FAILURE); default: HELP_MSG; exit (EXIT_SUCCESS); } } if (!lock_check()) { fprintf(stderr,"Access to performance counters is locked.\n"); exit(EXIT_FAILURE); } if (optClock && optind == argc) { fprintf(stderr,"Commandline option -p requires an executable.\n"); exit(EXIT_FAILURE); } if (optSockets && !optStethoscope && optind == argc) { fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n"); exit(EXIT_FAILURE); } if (cpuid_init() == EXIT_FAILURE) { fprintf(stderr, "CPU not supported\n"); exit(EXIT_FAILURE); } if (numSockets > cpuid_topology.numSockets) { fprintf(stderr, "System has only %d sockets but %d are given on commandline\n", cpuid_topology.numSockets, numSockets); exit(EXIT_FAILURE); } numa_init(); /* consider NUMA node as power unit for the moment */ accessClient_init(&socket_fd); msr_init(socket_fd); timer_init(); /* check for supported processors */ if ((cpuid_info.model == SANDYBRIDGE_EP) || (cpuid_info.model == SANDYBRIDGE) || (cpuid_info.model == IVYBRIDGE) || (cpuid_info.model == IVYBRIDGE_EP) || (cpuid_info.model == HASWELL) || (cpuid_info.model == NEHALEM_BLOOMFIELD) || (cpuid_info.model == NEHALEM_LYNNFIELD) || (cpuid_info.model == NEHALEM_WESTMERE)) { power_init(numa_info.nodes[0].processors[0]); } else { fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n"); exit(EXIT_FAILURE); } double clock = (double) timer_getCpuClock(); printf(HLINE); printf("CPU name:\t%s \n",cpuid_info.name); printf("CPU clock:\t%3.2f GHz \n", (float) clock * 1.E-09); printf(HLINE); if (optInfo) { if (power_info.turbo.numSteps != 0) { printf("Base clock:\t%.2f MHz \n", power_info.baseFrequency ); printf("Minimal clock:\t%.2f MHz \n", power_info.minFrequency ); printf("Turbo Boost Steps:\n"); for (int i=0; i < power_info.turbo.numSteps; i++ ) { printf("C%d %.2f MHz \n",i+1, power_info.turbo.steps[i] ); } } printf(HLINE); } if (cpuid_info.model == SANDYBRIDGE_EP) { hasDRAM = 1; } else if ((cpuid_info.model != SANDYBRIDGE) && (cpuid_info.model != SANDYBRIDGE_EP) && (cpuid_info.model != IVYBRIDGE) && (cpuid_info.model != IVYBRIDGE_EP) && (cpuid_info.model != HASWELL)) { fprintf (stderr, "RAPL not supported on this processor!\n"); exit(EXIT_FAILURE); } if (optInfo) { printf("Thermal Spec Power: %g Watts \n", power_info.tdp ); printf("Minimum Power: %g Watts \n", power_info.minPower); printf("Maximum Power: %g Watts \n", power_info.maxPower); printf("Maximum Time Window: %g micro sec \n", power_info.maxTimeWindow); printf(HLINE); exit(EXIT_SUCCESS); } if (optClock) { affinity_init(); argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1); for (int i=1; i<numSockets; i++) { bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1); bconcat(argString, tExpr); } numThreads = bstr_to_cpuset(threads, argString); bdestroy(argString); perfmon_init(numThreads, threads, stdout); perfmon_setupEventSet(eventString, NULL); } { PowerData pDataPkg[MAX_NUM_NODES*2]; PowerData pDataDram[MAX_NUM_NODES*2]; printf("Measure on sockets: %d", threadsSockets[0]); for (int i=1; i<numSockets; i++) { printf(", %d", threadsSockets[i]); } printf("\n"); if (optStethoscope) { if (optClock) { perfmon_startCounters(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM); power_start(pDataPkg+i, cpuId, PKG); } } sleep(optStethoscope); if (optClock) { perfmon_stopCounters(); perfmon_printCounterResults(); perfmon_finalize(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; power_stop(pDataPkg+i, cpuId, PKG); if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM); } } runtime = (double) optStethoscope; } else { TimerData time; argv += optind; bstring exeString = bfromcstr(argv[0]); for (int i=1; i<(argc-optind); i++) { bconchar(exeString, ' '); bcatcstr(exeString, argv[i]); } printf("%s\n",bdata(exeString)); if (optClock) { perfmon_startCounters(); } else { for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM); power_start(pDataPkg+i, cpuId, PKG); } timer_start(&time); } if (system(bdata(exeString)) == EOF) { fprintf(stderr, "Failed to execute %s!\n", bdata(exeString)); exit(EXIT_FAILURE); } if (optClock) { perfmon_stopCounters(); perfmon_printCounterResults(); perfmon_finalize(); } else { timer_stop(&time); for (int i=0; i<numSockets; i++) { int cpuId = numa_info.nodes[threadsSockets[i]].processors[0]; power_stop(pDataPkg+i, cpuId, PKG); if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM); } runtime = timer_print(&time); } } if (!optClock) { printf("Runtime: %g second \n",runtime); printf(HLINE); for (int i=0; i<numSockets; i++) { printf("Socket %d\n",threadsSockets[i]); printf("Domain: PKG \n"); printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i)); printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime ); if (hasDRAM) { printf("Domain: DRAM \n"); printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i)); printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime ); } printf("\n"); } } } #if 0 if ( cpuid_hasFeature(TM2) ) { thermal_init(0); printf("Current core temperatures:\n"); for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ ) { printf("Core %d: %u C\n", numa_info.nodes[socketId].processors[i], thermal_read(numa_info.nodes[socketId].processors[i])); } } #endif msr_finalize(); return EXIT_SUCCESS; }
int main(int argc, char** argv) { uint64_t iter = 100; uint32_t i; uint32_t j; int globalNumberOfThreads = 0; int optPrintDomains = 0; int c; ThreadUserData myData; bstring testcase = bfromcstr("none"); uint64_t numberOfWorkgroups = 0; int tmp = 0; double time; double cycPerUp = 0.0; const TestCase* test = NULL; uint64_t realSize = 0; uint64_t realIter = 0; uint64_t maxCycles = 0; uint64_t minCycles = UINT64_MAX; uint64_t cyclesClock = 0; uint64_t demandIter = 0; TimerData itertime; Workgroup* currentWorkgroup = NULL; Workgroup* groups = NULL; uint32_t min_runtime = 1; /* 1s */ bstring HLINE = bfromcstr(""); binsertch(HLINE, 0, 80, '-'); binsertch(HLINE, 80, 1, '\n'); int (*ownprintf)(const char *format, ...); ownprintf = &printf; /* Handling of command line options */ if (argc == 1) { HELP_MSG; exit(EXIT_SUCCESS); } while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) { switch (c) { case 'h': HELP_MSG; exit (EXIT_SUCCESS); case 'v': VERSION_MSG; exit (EXIT_SUCCESS); case 'a': ownprintf(TESTS"\n"); exit (EXIT_SUCCESS); case 'w': numberOfWorkgroups++; break; case 's': min_runtime = atoi(optarg); break; case 'i': demandIter = strtoul(optarg, NULL, 10); if (demandIter <= 0) { fprintf (stderr, "Error: Iterations must be greater than 0\n"); return EXIT_FAILURE; } break; case 'l': bdestroy(testcase); testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (test == NULL) { fprintf (stderr, "Error: Unknown test case %s\n",optarg); return EXIT_FAILURE; } else { ownprintf("Name: %s\n",test->name); ownprintf("Number of streams: %d\n",test->streams); ownprintf("Loop stride: %d\n",test->stride); ownprintf("Flops: %d\n",test->flops); ownprintf("Bytes: %d\n",test->bytes); switch (test->type) { case INT: ownprintf("Data Type: Integer\n"); break; case SINGLE: ownprintf("Data Type: Single precision float\n"); break; case DOUBLE: ownprintf("Data Type: Double precision float\n"); break; } if (test->loads >= 0) { ownprintf("Load Ops: %d\n",test->loads); } if (test->stores >= 0) { ownprintf("Store Ops: %d\n",test->stores); } if (test->branches >= 0) { ownprintf("Branches: %d\n",test->branches); } if (test->instr_const >= 0) { ownprintf("Constant instructions: %d\n",test->instr_const); } if (test->instr_loop >= 0) { ownprintf("Loop instructions: %d\n",test->instr_loop); } } bdestroy(testcase); exit (EXIT_SUCCESS); break; case 'p': optPrintDomains = 1; break; case 'g': numberOfWorkgroups = LLU_CAST atol(optarg); tmp = numberOfWorkgroups; break; case 't': bdestroy(testcase); testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (test == NULL) { fprintf (stderr, "Error: Unknown test case %s\n",optarg); return EXIT_FAILURE; } bdestroy(testcase); break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return EXIT_FAILURE; default: HELP_MSG; } } if ((numberOfWorkgroups == 0) && (!optPrintDomains)) { fprintf(stderr, "Error: At least one workgroup (-w) must be set on commandline\n"); exit (EXIT_FAILURE); } if (topology_init() != EXIT_SUCCESS) { fprintf(stderr, "Error: Unsupported processor!\n"); exit(EXIT_FAILURE); } if ((test == NULL) && (!optPrintDomains)) { fprintf(stderr, "Unknown test case. Please check likwid-bench -a for available tests\n"); fprintf(stderr, "and select one using the -t commandline option\n"); exit(EXIT_FAILURE); } numa_init(); affinity_init(); timer_init(); if (optPrintDomains) { bdestroy(testcase); AffinityDomains_t affinity = get_affinityDomains(); ownprintf("Number of Domains %d\n",affinity->numberOfAffinityDomains); for (i=0; i < affinity->numberOfAffinityDomains; i++ ) { ownprintf("Domain %d:\n",i); ownprintf("\tTag %s:",bdata(affinity->domains[i].tag)); for ( uint32_t j=0; j < affinity->domains[i].numberOfProcessors; j++ ) { ownprintf(" %d",affinity->domains[i].processorList[j]); } ownprintf("\n"); } exit (EXIT_SUCCESS); } allocator_init(numberOfWorkgroups * MAX_STREAMS); groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup)); tmp = 0; optind = 0; while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1) { switch (c) { case 'w': currentWorkgroup = groups+tmp; bstring groupstr = bfromcstr(optarg); i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams); bdestroy(groupstr); if (i == 0) { for (i=0; i< test->streams; i++) { if (currentWorkgroup->streams[i].offset%test->stride) { fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i); return EXIT_FAILURE; } allocator_allocateVector(&(currentWorkgroup->streams[i].ptr), PAGE_ALIGNMENT, currentWorkgroup->size, currentWorkgroup->streams[i].offset, test->type, currentWorkgroup->streams[i].domain); } tmp++; } else { exit(EXIT_FAILURE); } break; default: continue; break; } } /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread * module only allows equally sized thread groups*/ for (i=0; i<numberOfWorkgroups; i++) { globalNumberOfThreads += groups[i].numberOfThreads; } ownprintf(bdata(HLINE)); ownprintf("LIKWID MICRO BENCHMARK\n"); ownprintf("Test: %s\n",test->name); ownprintf(bdata(HLINE)); ownprintf("Using %" PRIu64 " work groups\n",numberOfWorkgroups); ownprintf("Using %d threads\n",globalNumberOfThreads); ownprintf(bdata(HLINE)); threads_init(globalNumberOfThreads); threads_createGroups(numberOfWorkgroups); /* we configure global barriers only */ barrier_init(1); barrier_registerGroup(globalNumberOfThreads); cyclesClock = timer_getCycleClock(); #ifdef LIKWID_PERFMON if (getenv("LIKWID_FILEPATH") != NULL) { ownprintf("Using Likwid Marker API\n"); } LIKWID_MARKER_INIT; ownprintf(bdata(HLINE)); #endif /* initialize data structures for threads */ for (i=0; i<numberOfWorkgroups; i++) { myData.iter = iter; if (demandIter > 0) { myData.iter = demandIter; } myData.min_runtime = min_runtime; myData.size = groups[i].size; myData.test = test; myData.cycles = 0; myData.numberOfThreads = groups[i].numberOfThreads; myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int)); myData.streams = (void**) malloc(test->streams * sizeof(void*)); for (j=0; j<groups[i].numberOfThreads; j++) { myData.processors[j] = groups[i].processorIds[j]; } for (j=0; j< test->streams; j++) { myData.streams[j] = groups[i].streams[j].ptr; } threads_registerDataGroup(i, &myData, copyThreadData); free(myData.processors); free(myData.streams); } if (demandIter == 0) { getIterSingle((void*) &threads_data[0]); for (i=0; i<numberOfWorkgroups; i++) { iter = threads_updateIterations(i, demandIter); } } #ifdef DEBUG_LIKWID else { ownprintf("Using manually selected iterations per thread\n"); } #endif threads_create(runTest); threads_join(); for (int i=0; i<globalNumberOfThreads; i++) { realSize += threads_data[i].data.size; realIter += threads_data[i].data.iter; if (threads_data[i].cycles > maxCycles) { maxCycles = threads_data[i].cycles; } if (threads_data[i].cycles < minCycles) { minCycles = threads_data[i].cycles; } } time = (double) maxCycles / (double) cyclesClock; ownprintf(bdata(HLINE)); ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles); ownprintf("CPU Clock:\t\t%" PRIu64 "\n", timer_getCpuClock()); ownprintf("Cycle Clock:\t\t%" PRIu64 "\n", cyclesClock); ownprintf("Time:\t\t\t%e sec\n", time); ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter); ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter); ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride)); ownprintf("Size:\t\t\t%" PRIu64 "\n", realSize*test->bytes ); ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes); ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize * test->flops)); ownprintf("MFlops/s:\t\t%.2f\n", 1.0E-06 * ((double) threads_data[0].data.iter * realSize * test->flops/ time)); ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize * test->bytes)); ownprintf("MByte/s:\t\t%.2f\n", 1.0E-06 * ( (double) threads_data[0].data.iter * realSize * test->bytes/ time)); cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize)); ownprintf("Cycles per update:\t%f\n", cycPerUp); switch ( test->type ) { case INT: case SINGLE: ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp)); break; case DOUBLE: ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp)); break; } ownprintf("Loads per update:\t%ld\n", test->loads ); ownprintf("Stores per update:\t%ld\n", test->stores ); if ((test->loads > 0) && (test->stores > 0)) { ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) ); } if ((test->instr_loop > 0) && (test->instr_const > 0)) { ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const ); } if (test->uops > 0) { ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter); } ownprintf(bdata(HLINE)); threads_destroy(numberOfWorkgroups, test->streams); allocator_finalize(); workgroups_destroy(&groups, numberOfWorkgroups, test->streams); #ifdef LIKWID_PERFMON if (getenv("LIKWID_FILEPATH") != NULL) { ownprintf("Writing Likwid Marker API results to file %s\n", getenv("LIKWID_FILEPATH")); } LIKWID_MARKER_CLOSE; #endif bdestroy(HLINE); return EXIT_SUCCESS; }