void kernel_init(multiboot_info_t *mboot_info) { extern char __start_bss[], __stop_bss[]; memset(__start_bss, 0, __stop_bss - __start_bss); /* mboot_info is a physical address. while some arches currently have the * lower memory mapped, everyone should have it mapped at kernbase by now. * also, it might be in 'free' memory, so once we start dynamically using * memory, we may clobber it. */ multiboot_kaddr = (struct multiboot_info*)((physaddr_t)mboot_info + KERNBASE); extract_multiboot_cmdline(multiboot_kaddr); cons_init(); print_cpuinfo(); printk("Boot Command Line: '%s'\n", boot_cmdline); exception_table_init(); cache_init(); // Determine systems's cache properties pmem_init(multiboot_kaddr); kmem_cache_init(); // Sets up slab allocator kmalloc_init(); hashtable_init(); radix_init(); cache_color_alloc_init(); // Inits data structs colored_page_alloc_init(); // Allocates colors for agnostic processes acpiinit(); topology_init(); kthread_init(); /* might need to tweak when this happens */ vmr_init(); file_init(); page_check(); idt_init(); kernel_msg_init(); timer_init(); vfs_init(); devfs_init(); train_timing(); kb_buf_init(&cons_buf); arch_init(); block_init(); enable_irq(); run_linker_funcs(); /* reset/init devtab after linker funcs 3 and 4. these run NIC and medium * pre-inits, which need to happen before devether. */ devtabreset(); devtabinit(); #ifdef CONFIG_EXT2FS mount_fs(&ext2_fs_type, "/dev/ramdisk", "/mnt", 0); #endif /* CONFIG_EXT2FS */ #ifdef CONFIG_ETH_AUDIO eth_audio_init(); #endif /* CONFIG_ETH_AUDIO */ get_coreboot_info(&sysinfo); booting = 0; #ifdef CONFIG_RUN_INIT_SCRIPT if (run_init_script()) { printk("Configured to run init script, but no script specified!\n"); manager(); } #else manager(); #endif }
static int lua_likwid_getAffinityInfo(lua_State* L) { int i,j; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (affinity_isInitialized == 0) { affinity_init(); affinity_isInitialized = 1; affinity = get_affinityDomains(); } if ((affinity_isInitialized) && (affinity == NULL)) { affinity = get_affinityDomains(); } if (!affinity) { lua_pushstring(L,"Cannot initialize affinity groups"); lua_error(L); } lua_newtable(L); lua_pushstring(L,"numberOfAffinityDomains"); lua_pushunsigned(L,affinity->numberOfAffinityDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfSocketDomains"); lua_pushunsigned(L,affinity->numberOfSocketDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfNumaDomains"); lua_pushunsigned(L,affinity->numberOfNumaDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessorsPerSocket"); lua_pushunsigned(L,affinity->numberOfProcessorsPerSocket); lua_settable(L,-3); lua_pushstring(L,"numberOfCacheDomains"); lua_pushunsigned(L,affinity->numberOfCacheDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfCoresPerCache"); lua_pushunsigned(L,affinity->numberOfCoresPerCache); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessorsPerCache"); lua_pushunsigned(L,affinity->numberOfProcessorsPerCache); lua_settable(L,-3); lua_pushstring(L,"domains"); lua_newtable(L); for(i=0;i<affinity->numberOfAffinityDomains;i++) { lua_pushunsigned(L, i+1); lua_newtable(L); lua_pushstring(L,"tag"); lua_pushstring(L,bdata(affinity->domains[i].tag)); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessors"); lua_pushunsigned(L,affinity->domains[i].numberOfProcessors); lua_settable(L,-3); lua_pushstring(L,"numberOfCores"); lua_pushunsigned(L,affinity->domains[i].numberOfCores); lua_settable(L,-3); lua_pushstring(L,"processorList"); lua_newtable(L); for(j=0;j<affinity->domains[i].numberOfProcessors;j++) { lua_pushunsigned(L,j+1); lua_pushunsigned(L,affinity->domains[i].processorList[j]); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); return 1; }
int main(int argc, char** argv) { uint64_t iter = 100; uint32_t i; uint32_t j; int globalNumberOfThreads = 0; int optPrintDomains = 0; int c; ThreadUserData myData; bstring testcase = bfromcstr("none"); uint64_t numberOfWorkgroups = 0; int tmp = 0; double time; double cycPerUp = 0.0; const TestCase* test = NULL; uint64_t realSize = 0; uint64_t realIter = 0; uint64_t maxCycles = 0; uint64_t minCycles = UINT64_MAX; uint64_t cyclesClock = 0; uint64_t demandIter = 0; TimerData itertime; Workgroup* currentWorkgroup = NULL; Workgroup* groups = NULL; uint32_t min_runtime = 1; /* 1s */ bstring HLINE = bfromcstr(""); binsertch(HLINE, 0, 80, '-'); binsertch(HLINE, 80, 1, '\n'); int (*ownprintf)(const char *format, ...); ownprintf = &printf; /* Handling of command line options */ if (argc == 1) { HELP_MSG; exit(EXIT_SUCCESS); } while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) { switch (c) { case 'h': HELP_MSG; exit (EXIT_SUCCESS); case 'v': VERSION_MSG; exit (EXIT_SUCCESS); case 'a': ownprintf(TESTS"\n"); exit (EXIT_SUCCESS); case 'w': numberOfWorkgroups++; break; case 's': min_runtime = atoi(optarg); break; case 'i': demandIter = strtoul(optarg, NULL, 10); if (demandIter <= 0) { fprintf (stderr, "Error: Iterations must be greater than 0\n"); return EXIT_FAILURE; } break; case 'l': bdestroy(testcase); testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (test == NULL) { fprintf (stderr, "Error: Unknown test case %s\n",optarg); return EXIT_FAILURE; } else { ownprintf("Name: %s\n",test->name); ownprintf("Number of streams: %d\n",test->streams); ownprintf("Loop stride: %d\n",test->stride); ownprintf("Flops: %d\n",test->flops); ownprintf("Bytes: %d\n",test->bytes); switch (test->type) { case INT: ownprintf("Data Type: Integer\n"); break; case SINGLE: ownprintf("Data Type: Single precision float\n"); break; case DOUBLE: ownprintf("Data Type: Double precision float\n"); break; } if (test->loads >= 0) { ownprintf("Load Ops: %d\n",test->loads); } if (test->stores >= 0) { ownprintf("Store Ops: %d\n",test->stores); } if (test->branches >= 0) { ownprintf("Branches: %d\n",test->branches); } if (test->instr_const >= 0) { ownprintf("Constant instructions: %d\n",test->instr_const); } if (test->instr_loop >= 0) { ownprintf("Loop instructions: %d\n",test->instr_loop); } } bdestroy(testcase); exit (EXIT_SUCCESS); break; case 'p': optPrintDomains = 1; break; case 'g': numberOfWorkgroups = LLU_CAST atol(optarg); tmp = numberOfWorkgroups; break; case 't': bdestroy(testcase); testcase = bfromcstr(optarg); for (i=0; i<NUMKERNELS; i++) { if (biseqcstr(testcase, kernels[i].name)) { test = kernels+i; break; } } if (test == NULL) { fprintf (stderr, "Error: Unknown test case %s\n",optarg); return EXIT_FAILURE; } bdestroy(testcase); break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return EXIT_FAILURE; default: HELP_MSG; } } if ((numberOfWorkgroups == 0) && (!optPrintDomains)) { fprintf(stderr, "Error: At least one workgroup (-w) must be set on commandline\n"); exit (EXIT_FAILURE); } if (topology_init() != EXIT_SUCCESS) { fprintf(stderr, "Error: Unsupported processor!\n"); exit(EXIT_FAILURE); } if ((test == NULL) && (!optPrintDomains)) { fprintf(stderr, "Unknown test case. Please check likwid-bench -a for available tests\n"); fprintf(stderr, "and select one using the -t commandline option\n"); exit(EXIT_FAILURE); } numa_init(); affinity_init(); timer_init(); if (optPrintDomains) { bdestroy(testcase); AffinityDomains_t affinity = get_affinityDomains(); ownprintf("Number of Domains %d\n",affinity->numberOfAffinityDomains); for (i=0; i < affinity->numberOfAffinityDomains; i++ ) { ownprintf("Domain %d:\n",i); ownprintf("\tTag %s:",bdata(affinity->domains[i].tag)); for ( uint32_t j=0; j < affinity->domains[i].numberOfProcessors; j++ ) { ownprintf(" %d",affinity->domains[i].processorList[j]); } ownprintf("\n"); } exit (EXIT_SUCCESS); } allocator_init(numberOfWorkgroups * MAX_STREAMS); groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup)); tmp = 0; optind = 0; while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1) { switch (c) { case 'w': currentWorkgroup = groups+tmp; bstring groupstr = bfromcstr(optarg); i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams); bdestroy(groupstr); if (i == 0) { for (i=0; i< test->streams; i++) { if (currentWorkgroup->streams[i].offset%test->stride) { fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i); return EXIT_FAILURE; } allocator_allocateVector(&(currentWorkgroup->streams[i].ptr), PAGE_ALIGNMENT, currentWorkgroup->size, currentWorkgroup->streams[i].offset, test->type, currentWorkgroup->streams[i].domain); } tmp++; } else { exit(EXIT_FAILURE); } break; default: continue; break; } } /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread * module only allows equally sized thread groups*/ for (i=0; i<numberOfWorkgroups; i++) { globalNumberOfThreads += groups[i].numberOfThreads; } ownprintf(bdata(HLINE)); ownprintf("LIKWID MICRO BENCHMARK\n"); ownprintf("Test: %s\n",test->name); ownprintf(bdata(HLINE)); ownprintf("Using %" PRIu64 " work groups\n",numberOfWorkgroups); ownprintf("Using %d threads\n",globalNumberOfThreads); ownprintf(bdata(HLINE)); threads_init(globalNumberOfThreads); threads_createGroups(numberOfWorkgroups); /* we configure global barriers only */ barrier_init(1); barrier_registerGroup(globalNumberOfThreads); cyclesClock = timer_getCycleClock(); #ifdef LIKWID_PERFMON if (getenv("LIKWID_FILEPATH") != NULL) { ownprintf("Using Likwid Marker API\n"); } LIKWID_MARKER_INIT; ownprintf(bdata(HLINE)); #endif /* initialize data structures for threads */ for (i=0; i<numberOfWorkgroups; i++) { myData.iter = iter; if (demandIter > 0) { myData.iter = demandIter; } myData.min_runtime = min_runtime; myData.size = groups[i].size; myData.test = test; myData.cycles = 0; myData.numberOfThreads = groups[i].numberOfThreads; myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int)); myData.streams = (void**) malloc(test->streams * sizeof(void*)); for (j=0; j<groups[i].numberOfThreads; j++) { myData.processors[j] = groups[i].processorIds[j]; } for (j=0; j< test->streams; j++) { myData.streams[j] = groups[i].streams[j].ptr; } threads_registerDataGroup(i, &myData, copyThreadData); free(myData.processors); free(myData.streams); } if (demandIter == 0) { getIterSingle((void*) &threads_data[0]); for (i=0; i<numberOfWorkgroups; i++) { iter = threads_updateIterations(i, demandIter); } } #ifdef DEBUG_LIKWID else { ownprintf("Using manually selected iterations per thread\n"); } #endif threads_create(runTest); threads_join(); for (int i=0; i<globalNumberOfThreads; i++) { realSize += threads_data[i].data.size; realIter += threads_data[i].data.iter; if (threads_data[i].cycles > maxCycles) { maxCycles = threads_data[i].cycles; } if (threads_data[i].cycles < minCycles) { minCycles = threads_data[i].cycles; } } time = (double) maxCycles / (double) cyclesClock; ownprintf(bdata(HLINE)); ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles); ownprintf("CPU Clock:\t\t%" PRIu64 "\n", timer_getCpuClock()); ownprintf("Cycle Clock:\t\t%" PRIu64 "\n", cyclesClock); ownprintf("Time:\t\t\t%e sec\n", time); ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter); ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter); ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride)); ownprintf("Size:\t\t\t%" PRIu64 "\n", realSize*test->bytes ); ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes); ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize * test->flops)); ownprintf("MFlops/s:\t\t%.2f\n", 1.0E-06 * ((double) threads_data[0].data.iter * realSize * test->flops/ time)); ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize * test->bytes)); ownprintf("MByte/s:\t\t%.2f\n", 1.0E-06 * ( (double) threads_data[0].data.iter * realSize * test->bytes/ time)); cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize)); ownprintf("Cycles per update:\t%f\n", cycPerUp); switch ( test->type ) { case INT: case SINGLE: ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp)); break; case DOUBLE: ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp)); break; } ownprintf("Loads per update:\t%ld\n", test->loads ); ownprintf("Stores per update:\t%ld\n", test->stores ); if ((test->loads > 0) && (test->stores > 0)) { ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) ); } if ((test->instr_loop > 0) && (test->instr_const > 0)) { ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const ); } if (test->uops > 0) { ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter); } ownprintf(bdata(HLINE)); threads_destroy(numberOfWorkgroups, test->streams); allocator_finalize(); workgroups_destroy(&groups, numberOfWorkgroups, test->streams); #ifdef LIKWID_PERFMON if (getenv("LIKWID_FILEPATH") != NULL) { ownprintf("Writing Likwid Marker API results to file %s\n", getenv("LIKWID_FILEPATH")); } LIKWID_MARKER_CLOSE; #endif bdestroy(HLINE); return EXIT_SUCCESS; }
static int lua_likwid_getEventsAndCounters(lua_State* L) { int i; char optString[1024]; int optStringIndex = 0; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } perfmon_init_maps(); lua_newtable(L); lua_pushstring(L,"Counters"); lua_newtable(L); for(i=1;i<=perfmon_numCounters;i++) { optStringIndex = 0; optString[0] = '\0'; lua_pushunsigned(L,i); lua_newtable(L); lua_pushstring(L,"Name"); lua_pushstring(L,counter_map[i-1].key); lua_settable(L,-3); lua_pushstring(L,"Options"); for(int j=1; j<NUM_EVENT_OPTIONS; j++) { if (counter_map[i-1].optionMask & REG_TYPE_MASK(j)) { optStringIndex += sprintf(&(optString[optStringIndex]), "%s|", eventOptionTypeName[j]); } } optString[optStringIndex-1] = '\0'; lua_pushstring(L,optString); lua_settable(L,-3); lua_pushstring(L,"Type"); lua_pushunsigned(L, counter_map[i-1].type); lua_settable(L,-3); lua_pushstring(L,"TypeName"); lua_pushstring(L, RegisterTypeNames[counter_map[i-1].type]); lua_settable(L,-3); lua_pushstring(L,"Index"); lua_pushunsigned(L,counter_map[i-1].index); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_pushstring(L,"Events"); lua_newtable(L); for(i=1;i<=perfmon_numArchEvents;i++) { optStringIndex = 0; optString[0] = '\0'; lua_pushunsigned(L,i); lua_newtable(L); lua_pushstring(L,"Name"); lua_pushstring(L,eventHash[i-1].name); lua_settable(L,-3); lua_pushstring(L,"ID"); lua_pushunsigned(L,eventHash[i-1].eventId); lua_settable(L,-3); lua_pushstring(L,"UMask"); lua_pushunsigned(L,eventHash[i-1].umask); lua_settable(L,-3); lua_pushstring(L,"Limit"); lua_pushstring(L,eventHash[i-1].limit); lua_settable(L,-3); lua_pushstring(L,"Options"); for(int j=1; j<NUM_EVENT_OPTIONS; j++) { if (eventHash[i-1].optionMask & REG_TYPE_MASK(j)) { optStringIndex += sprintf(&(optString[optStringIndex]), "%s|", eventOptionTypeName[j]); } } optString[optStringIndex-1] = '\0'; lua_pushstring(L,optString); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getNumaInfo(lua_State* L) { uint32_t i,j; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } else { lua_newtable(L); lua_pushstring(L,"numberOfNodes"); lua_pushunsigned(L,0); lua_settable(L,-3); lua_pushstring(L,"nodes"); lua_newtable(L); lua_settable(L,-3); return 1; } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (affinity_isInitialized == 0) { affinity_init(); affinity_isInitialized = 1; affinity = get_affinityDomains(); } if ((affinity_isInitialized) && (affinity == NULL)) { affinity = get_affinityDomains(); } lua_newtable(L); lua_pushstring(L,"numberOfNodes"); lua_pushunsigned(L,numainfo->numberOfNodes); lua_settable(L,-3); lua_pushstring(L,"nodes"); lua_newtable(L); for(i=0;i<numainfo->numberOfNodes;i++) { lua_pushinteger(L, i+1); lua_newtable(L); lua_pushstring(L,"id"); lua_pushunsigned(L,numainfo->nodes[i].id); lua_settable(L,-3); lua_pushstring(L,"totalMemory"); lua_pushunsigned(L,numainfo->nodes[i].totalMemory); lua_settable(L,-3); lua_pushstring(L,"freeMemory"); lua_pushunsigned(L,numainfo->nodes[i].freeMemory); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessors"); lua_pushunsigned(L,numainfo->nodes[i].numberOfProcessors); lua_settable(L,-3); lua_pushstring(L,"numberOfDistances"); lua_pushunsigned(L,numainfo->nodes[i].numberOfDistances); lua_settable(L,-3); lua_pushstring(L,"processors"); lua_newtable(L); for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++) { lua_pushunsigned(L,j+1); lua_pushunsigned(L,numainfo->nodes[i].processors[j]); lua_settable(L,-3); } lua_settable(L,-3); /*lua_pushstring(L,"processorsCompact"); lua_newtable(L); for(j=0;j<numa->nodes[i].numberOfProcessors;j++) { lua_pushunsigned(L,j); lua_pushunsigned(L,numa->nodes[i].processorsCompact[j]); lua_settable(L,-3); } lua_settable(L,-3);*/ lua_pushstring(L,"distances"); lua_newtable(L); for(j=0;j<numainfo->nodes[i].numberOfDistances;j++) { lua_pushinteger(L,j+1); lua_newtable(L); lua_pushinteger(L,j); lua_pushunsigned(L,numainfo->nodes[i].distances[j]); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getPowerInfo(lua_State* L) { int i; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (power_isInitialized == 0) { power_hasRAPL = power_init(0); if (power_hasRAPL) { power_isInitialized = 1; power = get_powerInfo(); } else { return 0; } } lua_newtable(L); lua_pushstring(L,"hasRAPL"); lua_pushboolean(L,power_hasRAPL); lua_settable(L,-3); lua_pushstring(L,"baseFrequency"); lua_pushnumber(L,power->baseFrequency); lua_settable(L,-3); lua_pushstring(L,"minFrequency"); lua_pushnumber(L,power->minFrequency); lua_settable(L,-3); lua_pushstring(L,"powerUnit"); lua_pushnumber(L,power->powerUnit); lua_settable(L,-3); lua_pushstring(L,"timeUnit"); lua_pushnumber(L,power->timeUnit); lua_settable(L,-3); lua_pushstring(L,"turbo"); lua_newtable(L); lua_pushstring(L,"numSteps"); lua_pushunsigned(L,power->turbo.numSteps); lua_settable(L,-3); lua_pushstring(L,"steps"); lua_newtable(L); for(i=0;i<power->turbo.numSteps;i++) { lua_pushunsigned(L,i+1); lua_pushnumber(L,power->turbo.steps[i]); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); lua_pushstring(L,"domains"); lua_newtable(L); for(i=0;i<NUM_POWER_DOMAINS;i++) { lua_pushstring(L,power_names[i]); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushnumber(L, power->domains[i].type); lua_settable(L,-3); lua_pushstring(L, "energyUnit"); lua_pushnumber(L, power->domains[i].energyUnit); lua_settable(L,-3); lua_pushstring(L,"supportStatus"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_STATUS) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportPerf"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_PERF) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportPolicy"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_POLICY) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportLimit"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_LIMIT) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_INFO) { lua_pushstring(L,"supportInfo"); lua_pushboolean(L, 1); lua_settable(L,-3); lua_pushstring(L,"tdp"); lua_pushnumber(L, power->domains[i].tdp); lua_settable(L,-3); lua_pushstring(L,"minPower"); lua_pushnumber(L, power->domains[i].minPower); lua_settable(L,-3); lua_pushstring(L,"maxPower"); lua_pushnumber(L, power->domains[i].maxPower); lua_settable(L,-3); lua_pushstring(L,"maxTimeWindow"); lua_pushnumber(L, power->domains[i].maxTimeWindow); lua_settable(L,-3); } else { lua_pushstring(L,"supportInfo"); lua_pushboolean(L, 0); lua_settable(L,-3); } lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getCpuTopology(lua_State* L) { int i; TreeNode* socketNode; int socketCount = 0; TreeNode* coreNode; int coreCount = 0; TreeNode* threadNode; int threadCount = 0; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } lua_newtable(L); lua_pushstring(L,"numHWThreads"); lua_pushunsigned(L,cputopo->numHWThreads); lua_settable(L,-3); lua_pushstring(L,"activeHWThreads"); lua_pushunsigned(L,cputopo->activeHWThreads); lua_settable(L,-3); lua_pushstring(L,"numSockets"); lua_pushunsigned(L,cputopo->numSockets); lua_settable(L,-3); lua_pushstring(L,"numCoresPerSocket"); lua_pushunsigned(L,cputopo->numCoresPerSocket); lua_settable(L,-3); lua_pushstring(L,"numThreadsPerCore"); lua_pushunsigned(L,cputopo->numThreadsPerCore); lua_settable(L,-3); lua_pushstring(L,"numCacheLevels"); lua_pushinteger(L,cputopo->numCacheLevels); lua_settable(L,-3); lua_pushstring(L,"threadPool"); lua_newtable(L); for(i=0;i<cputopo->numHWThreads;i++) { lua_pushnumber(L,i); lua_newtable(L); lua_pushstring(L,"threadId"); lua_pushunsigned(L,cputopo->threadPool[i].threadId); lua_settable(L,-3); lua_pushstring(L,"coreId"); lua_pushunsigned(L,cputopo->threadPool[i].coreId); lua_settable(L,-3); lua_pushstring(L,"packageId"); lua_pushunsigned(L,cputopo->threadPool[i].packageId); lua_settable(L,-3); lua_pushstring(L,"apicId"); lua_pushunsigned(L,cputopo->threadPool[i].apicId); lua_settable(L,-3); lua_pushstring(L,"inCpuSet"); lua_pushunsigned(L,cputopo->threadPool[i].inCpuSet); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_pushstring(L,"cacheLevels"); lua_newtable(L); for(i=0;i<cputopo->numCacheLevels;i++) { lua_pushnumber(L,i+1); lua_newtable(L); lua_pushstring(L,"level"); lua_pushunsigned(L,cputopo->cacheLevels[i].level); lua_settable(L,-3); lua_pushstring(L,"associativity"); lua_pushunsigned(L,cputopo->cacheLevels[i].associativity); lua_settable(L,-3); lua_pushstring(L,"sets"); lua_pushunsigned(L,cputopo->cacheLevels[i].sets); lua_settable(L,-3); lua_pushstring(L,"lineSize"); lua_pushunsigned(L,cputopo->cacheLevels[i].lineSize); lua_settable(L,-3); lua_pushstring(L,"size"); lua_pushunsigned(L,cputopo->cacheLevels[i].size); lua_settable(L,-3); lua_pushstring(L,"threads"); lua_pushunsigned(L,cputopo->cacheLevels[i].threads); lua_settable(L,-3); lua_pushstring(L,"inclusive"); lua_pushunsigned(L,cputopo->cacheLevels[i].inclusive); lua_settable(L,-3); lua_pushstring(L,"type"); switch (cputopo->cacheLevels[i].type) { case DATACACHE: lua_pushstring(L,"DATACACHE"); break; case INSTRUCTIONCACHE: lua_pushstring(L,"INSTRUCTIONCACHE"); break; case UNIFIEDCACHE: lua_pushstring(L,"UNIFIEDCACHE"); break; case ITLB: lua_pushstring(L,"ITLB"); break; case DTLB: lua_pushstring(L,"DTLB"); break; case NOCACHE: default: lua_pushstring(L,"NOCACHE"); break; } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_pushstring(L,"topologyTree"); lua_newtable(L); socketNode = tree_getChildNode(cputopo->topologyTree); while (socketNode != NULL) { lua_pushinteger(L, socketCount); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushunsigned(L,socketNode->id); lua_settable(L, -3); lua_pushstring(L, "Childs"); lua_newtable(L); coreCount = 0; coreNode = tree_getChildNode(socketNode); while (coreNode != NULL) { lua_pushinteger(L, coreCount); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushunsigned(L,coreNode->id); lua_settable(L,-3); lua_pushstring(L, "Childs"); lua_newtable(L); threadNode = tree_getChildNode(coreNode); threadCount = 0; while (threadNode != NULL) { lua_pushunsigned(L,threadCount); lua_pushunsigned(L,threadNode->id); lua_settable(L,-3); threadNode = tree_getNextNode(threadNode); threadCount++; } lua_settable(L,-3); coreNode = tree_getNextNode(coreNode); coreCount++; lua_settable(L,-3); } lua_settable(L,-3); socketNode = tree_getNextNode(socketNode); socketCount++; lua_settable(L,-3); } lua_settable(L,-3); return 1; }
int main(int argc, char* argv[]) { int i, j; int err; int* cpus; int gid; double result = 0.0; char estr[] = "INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2,TEMP_CORE:TMP0"; // Load the topology module and print some values. err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } // CpuInfo_t contains global information like name, CPU family, ... CpuInfo_t info = get_cpuInfo(); // CpuTopology_t contains information about the topology of the CPUs. CpuTopology_t topo = get_cpuTopology(); // Create affinity domains. Commonly only needed when reading Uncore counters //affinity_init(); printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads); cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; for (i=0;i<topo->numHWThreads;i++) { cpus[i] = topo->threadPool[i].apicId; } // Must be called before perfmon_init() but only if you want to use another // access mode as the pre-configured one. For direct access (0) you have to // be root. //accessClient_setaccessmode(0); // Initialize the perfmon module. err = perfmon_init(topo->numHWThreads, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); topology_finalize(); return 1; } // Add eventset string to the perfmon module. gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); topology_finalize(); return 1; } // Setup the eventset identified by group ID (gid). err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); topology_finalize(); return 1; } // Start all counters in the previously set up event set. err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Perform something sleep(2); // Stop all counters in the previously started event set. err = perfmon_stopCounters(); if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Print the result of every thread/CPU for all events in estr. char* ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < topo->numHWThreads; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } // Uninitialize the perfmon module. perfmon_finalize(); // Uninitialize the topology module. topology_finalize(); return 0; }
int cpustr_to_cpulist(char* cpustring, int* cpulist, int length) { int insert = 0; int len = 0; int ret = 0; bstring bcpustr = bfromcstr(cpustring); struct bstrList* strlist = bstrListCreate(); bstring scattercheck = bformat("scatter"); topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); strlist = bsplit(bcpustr, '@'); int* tmpList = (int*)malloc(length * sizeof(int)); if (tmpList == NULL) { bstrListDestroy(strlist); bdestroy(scattercheck); bdestroy(bcpustr); return -ENOMEM; } for (int i=0; i< strlist->qty; i++) { if (binstr(strlist->entry[i], 0, scattercheck) != BSTR_ERR) { ret = cpustr_to_cpulist_scatter(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (bstrchrp(strlist->entry[i], 'E', 0) == 0) { ret = cpustr_to_cpulist_expression(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (bstrchrp(strlist->entry[i], 'L', 0) == 0) { ret = cpustr_to_cpulist_logical(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (cpuid_topology->activeHWThreads < cpuid_topology->numHWThreads) { fprintf(stdout, "INFO: You are running LIKWID in a cpuset with %d CPUs, only logical numbering allowed", cpuid_topology->activeHWThreads); if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) || (bstrchrp(strlist->entry[i], 'S', 0) == 0) || (bstrchrp(strlist->entry[i], 'C', 0) == 0) || (bstrchrp(strlist->entry[i], 'M', 0) == 0)) && (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR)) { bstring newstr = bformat("L:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } else { bstring newstr = bformat("L:N:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } } if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) || (bstrchrp(strlist->entry[i], 'S', 0) == 0) || (bstrchrp(strlist->entry[i], 'C', 0) == 0) || (bstrchrp(strlist->entry[i], 'M', 0) == 0)) && (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR)) { bstring newstr = bformat("L:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } else { ret = cpustr_to_cpulist_physical(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } } free(tmpList); bstrListDestroy(strlist); return insert; }
static int cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); bstring bdomain; bstring blist; int domainidx = -1; if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR) { struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); bdomain = bstrcpy(strlist->entry[0]); blist = bstrcpy(strlist->entry[1]); bstrListDestroy(strlist); } else { bdomain = bformat("N"); blist = bstrcpy(bcpustr); } for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain)); bdestroy(bdomain); bdestroy(blist); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(blist, ','); int insert = 0; for (int i=0;i< strlist->qty; i++) { if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR) { struct bstrList* indexlist = bstrListCreate(); indexlist = bsplit(strlist->entry[i], '-'); for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++) { if (cpu_in_domain(domainidx, j)) { cpulist[insert] = j; insert++; if (insert == length) { bstrListDestroy(indexlist); goto physical_done; } } else { fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag)); } } bstrListDestroy(indexlist); } else { int cpu = atoi(bdata(strlist->entry[i])); if (cpu_in_domain(domainidx, cpu)) { cpulist[insert] = cpu; insert++; if (insert == length) { goto physical_done; } } else { fprintf(stderr, "CPU %d not in domain %s\n", cpu, bdata(affinity->domains[domainidx].tag)); } } } physical_done: bstrListDestroy(strlist); bdestroy(bdomain); bdestroy(blist); return insert; }
static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); int domainidx = -1; bstring bdomain; bstring blist; if (bstrchrp(bcpustr, 'L', 0) != 0) { fprintf(stderr, "Not a valid CPU expression\n"); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); if (strlist->qty != 3) { fprintf(stderr, "ERROR: Invalid expression, should look like L:<domain>:<indexlist> or be in a cpuset\n"); bstrListDestroy(strlist); return 0; } bdomain = bstrcpy(strlist->entry[1]); blist = bstrcpy(strlist->entry[2]); bstrListDestroy(strlist); for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { printf("Cannot find domain %s\n", bdata(bdomain)); return 0; } int *inlist = malloc(affinity->domains[domainidx].numberOfProcessors * sizeof(int)); if (inlist == NULL) { return -ENOMEM; } int ret = cpulist_sort(affinity->domains[domainidx].processorList, inlist, affinity->domains[domainidx].numberOfProcessors); strlist = bstrListCreate(); strlist = bsplit(blist, ','); int insert = 0; for (int i=0; i< strlist->qty; i++) { if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR) { struct bstrList* indexlist = bstrListCreate(); indexlist = bsplit(strlist->entry[i], '-'); for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++) { cpulist[insert] = inlist[j]; insert++; if (insert == length) { bstrListDestroy(indexlist); goto logical_done; } } bstrListDestroy(indexlist); } else { cpulist[insert] = inlist[atoi(bdata(strlist->entry[i]))]; insert++; if (insert == length) { goto logical_done; } } } logical_done: free(inlist); bstrListDestroy(strlist); return insert; }
static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); bstring bdomain; int domainidx = -1; int count = 0; int stride = 0; int chunk = 0; if (bstrchrp(bcpustr, 'E', 0) != 0) { fprintf(stderr, "Not a valid CPU expression\n"); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); if (strlist->qty == 3) { bdomain = bstrcpy(strlist->entry[1]); count = atoi(bdata(strlist->entry[2])); stride = 1; chunk = 1; } else if (strlist->qty == 5) { bdomain = bstrcpy(strlist->entry[1]); count = atoi(bdata(strlist->entry[2])); chunk = atoi(bdata(strlist->entry[3])); stride = atoi(bdata(strlist->entry[4])); } for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain)); bstrListDestroy(strlist); return 0; } int offset = 0; int insert = 0; for (int i=0;i<count;i++) { for (int j=0;j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++) { cpulist[insert] = affinity->domains[domainidx].processorList[offset + j]; insert++; if (insert == length) goto expression_done; } offset += stride; if (offset >= affinity->domains[domainidx].numberOfProcessors) { offset = 0; } if (insert >= count) goto expression_done; } bstrListDestroy(strlist); return 0; expression_done: bstrListDestroy(strlist); return insert; }