static int lua_likwid_init(lua_State* L) { int ret; int nrThreads = luaL_checknumber(L,1); luaL_argcheck(L, nrThreads > 0, 1, "CPU count must be greater than 0"); int cpus[nrThreads]; if (!lua_istable(L, -1)) { lua_pushstring(L,"No table given as second argument"); lua_error(L); } for (ret = 1; ret<=nrThreads; ret++) { lua_rawgeti(L,-1,ret); cpus[ret-1] = lua_tounsigned(L,-1); lua_pop(L,1); } if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { numa_init(); numa_isInitialized = 1; numainfo = get_numaTopology(); } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (perfmon_isInitialized == 0) { ret = perfmon_init(nrThreads, &(cpus[0])); if (ret != 0) { lua_pushstring(L,"Cannot initialize likwid perfmon"); lua_error(L); return 1; } perfmon_isInitialized = 1; timer_isInitialized = 1; lua_pushinteger(L,ret); } return 1; }
int main(int argc, char* argv[]) { int i; int* cpus; int gid; double result = 0.0; // Load the topology module and print some values. topology_init(); // CpuInfo_t contains global information like name, CPU family, ... CpuInfo_t info = get_cpuInfo(); // CpuTopology_t contains information about the topology of the CPUs. CpuTopology_t topo = get_cpuTopology(); printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads); cpus = malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; for (i=0;i<topo->numHWThreads;i++) { cpus[i] = topo->threadPool[i].apicId; } // Must be called before perfmon_init() but only if you want to use another // access mode as the pre-configured one. For direct access (0) you have to // be root. //accessClient_setaccessmode(0); // Initialize the perfmon module. perfmon_init(topo->numHWThreads, cpus); // Add eventset string to the perfmon module. gid = perfmon_addEventSet(EVENTSET); // Setup the eventset identified by group ID (gid). perfmon_setupCounters(gid); // Start all counters in the previously set up event set. perfmon_startCounters(); // Perform something sleep(2); // Stop all counters in the previously started event set. perfmon_stopCounters(); // Print the result of every thread/CPU. for (i = 0;i < topo->numHWThreads; i++) { result = perfmon_getResult(gid, 0, i); printf("Measurement result for event set %s at CPU %d: %f\n", EVENTSET, cpus[i], result); } // Uninitialize the perfmon module. perfmon_finalize(); // Uninitialize the topology module. topology_finalize(); return 0; }
static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); char* cpustring = bstr2cstr(bcpustr, '\0'); if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR) { int insert = 0; int suitidx = 0; int* suitable = (int*)malloc(affinity->numberOfAffinityDomains*sizeof(int)); if (!suitable) { bcstrfree(cpustring); return -ENOMEM; } for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrchrp(affinity->domains[i].tag, cpustring[0], 0) != BSTR_ERR) { suitable[suitidx] = i; suitidx++; } } int* sortedList = (int*) malloc(affinity->domains[suitable[0]].numberOfProcessors * sizeof(int)); if (!sortedList) { free(suitable); bcstrfree(cpustring); return -ENOMEM; } for (int off=0;off<affinity->domains[suitable[0]].numberOfProcessors;off++) { for(int i=0;i < suitidx; i++) { cpulist_sort(affinity->domains[suitable[i]].processorList, sortedList, affinity->domains[suitable[i]].numberOfProcessors); cpulist[insert] = sortedList[off]; insert++; if (insert == length) goto scatter_done; } } scatter_done: bcstrfree(cpustring); free(sortedList); free(suitable); return insert; } bcstrfree(cpustring); return 0; }
int test_affinityinit() { int i = 0; topology_init(); CpuTopology_t cputopo = get_cpuTopology(); numa_init(); affinity_init(); AffinityDomains_t doms = get_affinityDomains(); if (doms == NULL) goto fail; if (doms->numberOfSocketDomains != cputopo->numSockets) goto fail; if (doms->numberOfNumaDomains == 0) goto fail; if (doms->numberOfProcessorsPerSocket == 0) goto fail; if (doms->numberOfAffinityDomains == 0) goto fail; if (doms->numberOfCacheDomains == 0) goto fail; if (doms->numberOfCoresPerCache == 0) goto fail; if (doms->numberOfProcessorsPerCache == 0) goto fail; if (doms->numberOfProcessorsPerCache < doms->numberOfCoresPerCache) goto fail; if (doms->domains == NULL) goto fail; for (i = 0; i < doms->numberOfAffinityDomains; i++) { if (doms->domains[i].numberOfProcessors == 0) goto fail; if (doms->domains[i].numberOfCores == 0) goto fail; if (doms->domains[i].numberOfProcessors < doms->domains[i].numberOfCores) goto fail; if (doms->domains[i].processorList == NULL) goto fail; } affinity_finalize(); topology_finalize(); return 1; fail: affinity_finalize(); topology_finalize(); return 0; }
static int cpulist_sort(int* incpus, int* outcpus, int length) { int insert = 0; topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); if (length <= 0) { return -1; } for (int off=0;off < cpuid_topology->numThreadsPerCore;off++) { for (int i=0; i<length/cpuid_topology->numThreadsPerCore;i++) { outcpus[insert] = incpus[(i*cpuid_topology->numThreadsPerCore)+off]; insert++; } } return insert; }
static int cpuexpr_to_list(bstring bcpustr, bstring prefix, int* list, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ','); int oldinsert = 0; int insert = 0; for (int i=0;i < strlist->qty; i++) { bstring newstr = bstrcpy(prefix); bconcat(newstr, strlist->entry[i]); oldinsert = insert; for (int j = 0; j < affinity->numberOfAffinityDomains; j++) { if (bstrcmp(affinity->domains[j].tag, newstr) == 0) { list[insert] = atoi(bdata(strlist->entry[i])); insert++; if (insert == length) goto list_done; break; } } if (insert == oldinsert) { fprintf(stderr,"Domain %s cannot be found\n", bdata(newstr)); } bdestroy(newstr); } list_done: bstrListDestroy(strlist); return insert; }
static int lua_likwid_getAffinityInfo(lua_State* L) { int i,j; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (affinity_isInitialized == 0) { affinity_init(); affinity_isInitialized = 1; affinity = get_affinityDomains(); } if ((affinity_isInitialized) && (affinity == NULL)) { affinity = get_affinityDomains(); } if (!affinity) { lua_pushstring(L,"Cannot initialize affinity groups"); lua_error(L); } lua_newtable(L); lua_pushstring(L,"numberOfAffinityDomains"); lua_pushunsigned(L,affinity->numberOfAffinityDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfSocketDomains"); lua_pushunsigned(L,affinity->numberOfSocketDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfNumaDomains"); lua_pushunsigned(L,affinity->numberOfNumaDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessorsPerSocket"); lua_pushunsigned(L,affinity->numberOfProcessorsPerSocket); lua_settable(L,-3); lua_pushstring(L,"numberOfCacheDomains"); lua_pushunsigned(L,affinity->numberOfCacheDomains); lua_settable(L,-3); lua_pushstring(L,"numberOfCoresPerCache"); lua_pushunsigned(L,affinity->numberOfCoresPerCache); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessorsPerCache"); lua_pushunsigned(L,affinity->numberOfProcessorsPerCache); lua_settable(L,-3); lua_pushstring(L,"domains"); lua_newtable(L); for(i=0;i<affinity->numberOfAffinityDomains;i++) { lua_pushunsigned(L, i+1); lua_newtable(L); lua_pushstring(L,"tag"); lua_pushstring(L,bdata(affinity->domains[i].tag)); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessors"); lua_pushunsigned(L,affinity->domains[i].numberOfProcessors); lua_settable(L,-3); lua_pushstring(L,"numberOfCores"); lua_pushunsigned(L,affinity->domains[i].numberOfCores); lua_settable(L,-3); lua_pushstring(L,"processorList"); lua_newtable(L); for(j=0;j<affinity->domains[i].numberOfProcessors;j++) { lua_pushunsigned(L,j+1); lua_pushunsigned(L,affinity->domains[i].processorList[j]); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getNumaInfo(lua_State* L) { uint32_t i,j; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } else { lua_newtable(L); lua_pushstring(L,"numberOfNodes"); lua_pushunsigned(L,0); lua_settable(L,-3); lua_pushstring(L,"nodes"); lua_newtable(L); lua_settable(L,-3); return 1; } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } if (affinity_isInitialized == 0) { affinity_init(); affinity_isInitialized = 1; affinity = get_affinityDomains(); } if ((affinity_isInitialized) && (affinity == NULL)) { affinity = get_affinityDomains(); } lua_newtable(L); lua_pushstring(L,"numberOfNodes"); lua_pushunsigned(L,numainfo->numberOfNodes); lua_settable(L,-3); lua_pushstring(L,"nodes"); lua_newtable(L); for(i=0;i<numainfo->numberOfNodes;i++) { lua_pushinteger(L, i+1); lua_newtable(L); lua_pushstring(L,"id"); lua_pushunsigned(L,numainfo->nodes[i].id); lua_settable(L,-3); lua_pushstring(L,"totalMemory"); lua_pushunsigned(L,numainfo->nodes[i].totalMemory); lua_settable(L,-3); lua_pushstring(L,"freeMemory"); lua_pushunsigned(L,numainfo->nodes[i].freeMemory); lua_settable(L,-3); lua_pushstring(L,"numberOfProcessors"); lua_pushunsigned(L,numainfo->nodes[i].numberOfProcessors); lua_settable(L,-3); lua_pushstring(L,"numberOfDistances"); lua_pushunsigned(L,numainfo->nodes[i].numberOfDistances); lua_settable(L,-3); lua_pushstring(L,"processors"); lua_newtable(L); for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++) { lua_pushunsigned(L,j+1); lua_pushunsigned(L,numainfo->nodes[i].processors[j]); lua_settable(L,-3); } lua_settable(L,-3); /*lua_pushstring(L,"processorsCompact"); lua_newtable(L); for(j=0;j<numa->nodes[i].numberOfProcessors;j++) { lua_pushunsigned(L,j); lua_pushunsigned(L,numa->nodes[i].processorsCompact[j]); lua_settable(L,-3); } lua_settable(L,-3);*/ lua_pushstring(L,"distances"); lua_newtable(L); for(j=0;j<numainfo->nodes[i].numberOfDistances;j++) { lua_pushinteger(L,j+1); lua_newtable(L); lua_pushinteger(L,j); lua_pushunsigned(L,numainfo->nodes[i].distances[j]); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getCpuTopology(lua_State* L) { int i; TreeNode* socketNode; int socketCount = 0; TreeNode* coreNode; int coreCount = 0; TreeNode* threadNode; int threadCount = 0; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (numa_isInitialized == 0) { if (numa_init() == 0) { numa_isInitialized = 1; numainfo = get_numaTopology(); } } if ((numa_isInitialized) && (numainfo == NULL)) { numainfo = get_numaTopology(); } lua_newtable(L); lua_pushstring(L,"numHWThreads"); lua_pushunsigned(L,cputopo->numHWThreads); lua_settable(L,-3); lua_pushstring(L,"activeHWThreads"); lua_pushunsigned(L,cputopo->activeHWThreads); lua_settable(L,-3); lua_pushstring(L,"numSockets"); lua_pushunsigned(L,cputopo->numSockets); lua_settable(L,-3); lua_pushstring(L,"numCoresPerSocket"); lua_pushunsigned(L,cputopo->numCoresPerSocket); lua_settable(L,-3); lua_pushstring(L,"numThreadsPerCore"); lua_pushunsigned(L,cputopo->numThreadsPerCore); lua_settable(L,-3); lua_pushstring(L,"numCacheLevels"); lua_pushinteger(L,cputopo->numCacheLevels); lua_settable(L,-3); lua_pushstring(L,"threadPool"); lua_newtable(L); for(i=0;i<cputopo->numHWThreads;i++) { lua_pushnumber(L,i); lua_newtable(L); lua_pushstring(L,"threadId"); lua_pushunsigned(L,cputopo->threadPool[i].threadId); lua_settable(L,-3); lua_pushstring(L,"coreId"); lua_pushunsigned(L,cputopo->threadPool[i].coreId); lua_settable(L,-3); lua_pushstring(L,"packageId"); lua_pushunsigned(L,cputopo->threadPool[i].packageId); lua_settable(L,-3); lua_pushstring(L,"apicId"); lua_pushunsigned(L,cputopo->threadPool[i].apicId); lua_settable(L,-3); lua_pushstring(L,"inCpuSet"); lua_pushunsigned(L,cputopo->threadPool[i].inCpuSet); lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_pushstring(L,"cacheLevels"); lua_newtable(L); for(i=0;i<cputopo->numCacheLevels;i++) { lua_pushnumber(L,i+1); lua_newtable(L); lua_pushstring(L,"level"); lua_pushunsigned(L,cputopo->cacheLevels[i].level); lua_settable(L,-3); lua_pushstring(L,"associativity"); lua_pushunsigned(L,cputopo->cacheLevels[i].associativity); lua_settable(L,-3); lua_pushstring(L,"sets"); lua_pushunsigned(L,cputopo->cacheLevels[i].sets); lua_settable(L,-3); lua_pushstring(L,"lineSize"); lua_pushunsigned(L,cputopo->cacheLevels[i].lineSize); lua_settable(L,-3); lua_pushstring(L,"size"); lua_pushunsigned(L,cputopo->cacheLevels[i].size); lua_settable(L,-3); lua_pushstring(L,"threads"); lua_pushunsigned(L,cputopo->cacheLevels[i].threads); lua_settable(L,-3); lua_pushstring(L,"inclusive"); lua_pushunsigned(L,cputopo->cacheLevels[i].inclusive); lua_settable(L,-3); lua_pushstring(L,"type"); switch (cputopo->cacheLevels[i].type) { case DATACACHE: lua_pushstring(L,"DATACACHE"); break; case INSTRUCTIONCACHE: lua_pushstring(L,"INSTRUCTIONCACHE"); break; case UNIFIEDCACHE: lua_pushstring(L,"UNIFIEDCACHE"); break; case ITLB: lua_pushstring(L,"ITLB"); break; case DTLB: lua_pushstring(L,"DTLB"); break; case NOCACHE: default: lua_pushstring(L,"NOCACHE"); break; } lua_settable(L,-3); lua_settable(L,-3); } lua_settable(L,-3); lua_pushstring(L,"topologyTree"); lua_newtable(L); socketNode = tree_getChildNode(cputopo->topologyTree); while (socketNode != NULL) { lua_pushinteger(L, socketCount); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushunsigned(L,socketNode->id); lua_settable(L, -3); lua_pushstring(L, "Childs"); lua_newtable(L); coreCount = 0; coreNode = tree_getChildNode(socketNode); while (coreNode != NULL) { lua_pushinteger(L, coreCount); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushunsigned(L,coreNode->id); lua_settable(L,-3); lua_pushstring(L, "Childs"); lua_newtable(L); threadNode = tree_getChildNode(coreNode); threadCount = 0; while (threadNode != NULL) { lua_pushunsigned(L,threadCount); lua_pushunsigned(L,threadNode->id); lua_settable(L,-3); threadNode = tree_getNextNode(threadNode); threadCount++; } lua_settable(L,-3); coreNode = tree_getNextNode(coreNode); coreCount++; lua_settable(L,-3); } lua_settable(L,-3); socketNode = tree_getNextNode(socketNode); socketCount++; lua_settable(L,-3); } lua_settable(L,-3); return 1; }
static int lua_likwid_getPowerInfo(lua_State* L) { int i; if (topology_isInitialized == 0) { topology_init(); topology_isInitialized = 1; cpuinfo = get_cpuInfo(); cputopo = get_cpuTopology(); } if ((topology_isInitialized) && (cpuinfo == NULL)) { cpuinfo = get_cpuInfo(); } if ((topology_isInitialized) && (cputopo == NULL)) { cputopo = get_cpuTopology(); } if (power_isInitialized == 0) { power_hasRAPL = power_init(0); if (power_hasRAPL) { power_isInitialized = 1; power = get_powerInfo(); } else { return 0; } } lua_newtable(L); lua_pushstring(L,"hasRAPL"); lua_pushboolean(L,power_hasRAPL); lua_settable(L,-3); lua_pushstring(L,"baseFrequency"); lua_pushnumber(L,power->baseFrequency); lua_settable(L,-3); lua_pushstring(L,"minFrequency"); lua_pushnumber(L,power->minFrequency); lua_settable(L,-3); lua_pushstring(L,"powerUnit"); lua_pushnumber(L,power->powerUnit); lua_settable(L,-3); lua_pushstring(L,"timeUnit"); lua_pushnumber(L,power->timeUnit); lua_settable(L,-3); lua_pushstring(L,"turbo"); lua_newtable(L); lua_pushstring(L,"numSteps"); lua_pushunsigned(L,power->turbo.numSteps); lua_settable(L,-3); lua_pushstring(L,"steps"); lua_newtable(L); for(i=0;i<power->turbo.numSteps;i++) { lua_pushunsigned(L,i+1); lua_pushnumber(L,power->turbo.steps[i]); lua_settable(L,-3); } lua_settable(L,-3); lua_settable(L,-3); lua_pushstring(L,"domains"); lua_newtable(L); for(i=0;i<NUM_POWER_DOMAINS;i++) { lua_pushstring(L,power_names[i]); lua_newtable(L); lua_pushstring(L, "ID"); lua_pushnumber(L, power->domains[i].type); lua_settable(L,-3); lua_pushstring(L, "energyUnit"); lua_pushnumber(L, power->domains[i].energyUnit); lua_settable(L,-3); lua_pushstring(L,"supportStatus"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_STATUS) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportPerf"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_PERF) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportPolicy"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_POLICY) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); lua_pushstring(L,"supportLimit"); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_LIMIT) { lua_pushboolean(L, 1); } else { lua_pushboolean(L, 0); } lua_settable(L,-3); if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_INFO) { lua_pushstring(L,"supportInfo"); lua_pushboolean(L, 1); lua_settable(L,-3); lua_pushstring(L,"tdp"); lua_pushnumber(L, power->domains[i].tdp); lua_settable(L,-3); lua_pushstring(L,"minPower"); lua_pushnumber(L, power->domains[i].minPower); lua_settable(L,-3); lua_pushstring(L,"maxPower"); lua_pushnumber(L, power->domains[i].maxPower); lua_settable(L,-3); lua_pushstring(L,"maxTimeWindow"); lua_pushnumber(L, power->domains[i].maxTimeWindow); lua_settable(L,-3); } else { lua_pushstring(L,"supportInfo"); lua_pushboolean(L, 0); lua_settable(L,-3); } lua_settable(L,-3); } lua_settable(L,-3); return 1; }
bstring parse_workgroup(Workgroup* group, const_bstring str, DataType type) { CpuTopology_t topo; struct bstrList* tokens; bstring cpustr; int numThreads = 0; bstring domain; tokens = bsplit(str,':'); if (tokens->qty == 2) { topo = get_cpuTopology(); numThreads = topo->activeHWThreads; cpustr = bformat("E:%s:%d", bdata(tokens->entry[0]), numThreads ); } else if (tokens->qty == 3) { cpustr = bformat("E:%s:%s", bdata(tokens->entry[0]), bdata(tokens->entry[2])); numThreads = str2int(bdata(tokens->entry[2])); if (numThreads < 0) { fprintf(stderr, "Cannot convert %s to integer\n", bdata(tokens->entry[2])); bstrListDestroy(tokens); return NULL; } } else if (tokens->qty == 5) { cpustr = bformat("E:%s:%s:%s:%s", bdata(tokens->entry[0]), bdata(tokens->entry[2]), bdata(tokens->entry[3]), bdata(tokens->entry[4])); numThreads = str2int(bdata(tokens->entry[2])); if (numThreads < 0) { fprintf(stderr, "Cannot convert %s to integer\n", bdata(tokens->entry[2])); bstrListDestroy(tokens); return NULL; } } else { fprintf(stderr, "Misformated workgroup string\n"); bstrListDestroy(tokens); return NULL; } group->size = bstr_to_doubleSize(tokens->entry[1], type); if (group->size == 0) { fprintf(stderr, "Stream size cannot be read, should look like <domain>:<size>\n"); bstrListDestroy(tokens); return NULL; } group->processorIds = (int*) malloc(numThreads * sizeof(int)); if (group->processorIds == NULL) { fprintf(stderr, "No more memory to allocate list of processors\n"); bstrListDestroy(tokens); return NULL; } group->numberOfThreads = numThreads; if (cpustr_to_cpulist(bdata(cpustr),group->processorIds, numThreads) < 0 ) { free(group->processorIds); bstrListDestroy(tokens); return NULL; } domain = bstrcpy(tokens->entry[0]); bdestroy(cpustr); bstrListDestroy(tokens); return domain; }
int cpustr_to_cpulist(char* cpustring, int* cpulist, int length) { int insert = 0; int len = 0; int ret = 0; bstring bcpustr = bfromcstr(cpustring); struct bstrList* strlist = bstrListCreate(); bstring scattercheck = bformat("scatter"); topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); strlist = bsplit(bcpustr, '@'); int* tmpList = (int*)malloc(length * sizeof(int)); if (tmpList == NULL) { bstrListDestroy(strlist); bdestroy(scattercheck); bdestroy(bcpustr); return -ENOMEM; } for (int i=0; i< strlist->qty; i++) { if (binstr(strlist->entry[i], 0, scattercheck) != BSTR_ERR) { ret = cpustr_to_cpulist_scatter(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (bstrchrp(strlist->entry[i], 'E', 0) == 0) { ret = cpustr_to_cpulist_expression(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (bstrchrp(strlist->entry[i], 'L', 0) == 0) { ret = cpustr_to_cpulist_logical(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } else if (cpuid_topology->activeHWThreads < cpuid_topology->numHWThreads) { fprintf(stdout, "INFO: You are running LIKWID in a cpuset with %d CPUs, only logical numbering allowed\n", cpuid_topology->activeHWThreads); if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) || (bstrchrp(strlist->entry[i], 'S', 0) == 0) || (bstrchrp(strlist->entry[i], 'C', 0) == 0) || (bstrchrp(strlist->entry[i], 'M', 0) == 0)) && (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR)) { bstring newstr = bformat("L:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } else { bstring newstr = bformat("L:N:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } } else if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) || (bstrchrp(strlist->entry[i], 'S', 0) == 0) || (bstrchrp(strlist->entry[i], 'C', 0) == 0) || (bstrchrp(strlist->entry[i], 'M', 0) == 0)) && (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR)) { bstring newstr = bformat("L:"); bconcat(newstr, strlist->entry[i]); ret = cpustr_to_cpulist_logical(newstr, tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); bdestroy(newstr); } else { ret = cpustr_to_cpulist_physical(strlist->entry[i], tmpList, length); insert += cpulist_concat(cpulist, insert, tmpList, ret); } } free(tmpList); bstrListDestroy(strlist); return insert; }
static int cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); bstring bdomain; bstring blist; int domainidx = -1; if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR) { struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); bdomain = bstrcpy(strlist->entry[0]); blist = bstrcpy(strlist->entry[1]); bstrListDestroy(strlist); } else { bdomain = bformat("N"); blist = bstrcpy(bcpustr); } for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain)); bdestroy(bdomain); bdestroy(blist); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(blist, ','); int insert = 0; for (int i=0;i< strlist->qty; i++) { if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR) { struct bstrList* indexlist = bstrListCreate(); indexlist = bsplit(strlist->entry[i], '-'); if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1]))) { for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++) { if (cpu_in_domain(domainidx, j)) { cpulist[insert] = j; insert++; if (insert == length) { bstrListDestroy(indexlist); goto physical_done; } } else { fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag)); } } } else { for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--) { if (cpu_in_domain(domainidx, j)) { cpulist[insert] = j; insert++; if (insert == length) { bstrListDestroy(indexlist); goto physical_done; } } else { fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag)); } } } bstrListDestroy(indexlist); } else { int cpu = atoi(bdata(strlist->entry[i])); if (cpu_in_domain(domainidx, cpu)) { cpulist[insert] = cpu; insert++; if (insert == length) { goto physical_done; } } else { fprintf(stderr, "CPU %d not in domain %s\n", cpu, bdata(affinity->domains[domainidx].tag)); } } } physical_done: bstrListDestroy(strlist); bdestroy(bdomain); bdestroy(blist); return insert; }
static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); int domainidx = -1; bstring bdomain; bstring blist; if (bstrchrp(bcpustr, 'L', 0) != 0) { fprintf(stderr, "Not a valid CPU expression\n"); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); if (strlist->qty != 3) { fprintf(stderr, "ERROR: Invalid expression, should look like L:<domain>:<indexlist> or be in a cpuset\n"); bstrListDestroy(strlist); return 0; } bdomain = bstrcpy(strlist->entry[1]); blist = bstrcpy(strlist->entry[2]); bstrListDestroy(strlist); for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain)); return 0; } int *inlist = malloc(affinity->domains[domainidx].numberOfProcessors * sizeof(int)); if (inlist == NULL) { return -ENOMEM; } int ret = cpulist_sort(affinity->domains[domainidx].processorList, inlist, affinity->domains[domainidx].numberOfProcessors); strlist = bstrListCreate(); strlist = bsplit(blist, ','); int insert = 0; for (int i=0; i< strlist->qty; i++) { if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR) { struct bstrList* indexlist = bstrListCreate(); indexlist = bsplit(strlist->entry[i], '-'); if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1]))) { for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++) { cpulist[insert] = inlist[j]; insert++; if (insert == length) { bstrListDestroy(indexlist); goto logical_done; } } } else { for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--) { cpulist[insert] = inlist[j]; insert++; if (insert == length) { bstrListDestroy(indexlist); goto logical_done; } } } bstrListDestroy(indexlist); } else { cpulist[insert] = inlist[atoi(bdata(strlist->entry[i]))]; insert++; if (insert == length) { goto logical_done; } } } logical_done: free(inlist); bstrListDestroy(strlist); return insert; }
static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length) { topology_init(); CpuTopology_t cpuid_topology = get_cpuTopology(); affinity_init(); AffinityDomains_t affinity = get_affinityDomains(); bstring bdomain; int domainidx = -1; int count = 0; int stride = 0; int chunk = 0; if (bstrchrp(bcpustr, 'E', 0) != 0) { fprintf(stderr, "Not a valid CPU expression\n"); return 0; } struct bstrList* strlist = bstrListCreate(); strlist = bsplit(bcpustr, ':'); if (strlist->qty == 3) { bdomain = bstrcpy(strlist->entry[1]); count = atoi(bdata(strlist->entry[2])); stride = 1; chunk = 1; } else if (strlist->qty == 5) { bdomain = bstrcpy(strlist->entry[1]); count = atoi(bdata(strlist->entry[2])); chunk = atoi(bdata(strlist->entry[3])); stride = atoi(bdata(strlist->entry[4])); } for (int i=0; i<affinity->numberOfAffinityDomains; i++) { if (bstrcmp(bdomain, affinity->domains[i].tag) == 0) { domainidx = i; break; } } if (domainidx < 0) { fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain)); bstrListDestroy(strlist); return 0; } int offset = 0; int insert = 0; for (int i=0;i<count;i++) { for (int j=0;j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++) { cpulist[insert] = affinity->domains[domainidx].processorList[offset + j]; insert++; if (insert == length) goto expression_done; } offset += stride; if (offset >= affinity->domains[domainidx].numberOfProcessors) { offset = 0; } if (insert >= count) goto expression_done; } bstrListDestroy(strlist); return 0; expression_done: bstrListDestroy(strlist); return insert; }
int test_topologyinit() { int i, j; int ret = topology_init(); if (ret != 0) goto fail; CpuInfo_t cpuinfo = get_cpuInfo(); if (cpuinfo == NULL) goto fail; if (cpuinfo->family == 0) goto fail; if (cpuinfo->model == 0) goto fail; if (cpuinfo->osname == NULL) goto fail; if (cpuinfo->name == NULL) goto fail; if (cpuinfo->features == NULL) goto fail; CpuTopology_t cputopo = get_cpuTopology(); if (cputopo->threadPool == NULL) goto fail; if (cputopo->cacheLevels == NULL) goto fail; if (cputopo->numHWThreads == 0) goto fail; if (cputopo->activeHWThreads == 0) goto fail; if (cputopo->numSockets == 0) goto fail; if (cputopo->numCoresPerSocket < 1) goto fail; if (cputopo->numThreadsPerCore < 1) goto fail; if (cputopo->numHWThreads > 0) { for (i = 0; i < cputopo->numHWThreads; i++) { for (j=0;j< cputopo->numHWThreads; j++) { if ((i != j) && (cputopo->threadPool[i].apicId == cputopo->threadPool[j].apicId)) goto fail; } if (cputopo->threadPool[i].threadId >= cputopo->numThreadsPerCore) { goto fail; } if (cputopo->threadPool[i].packageId >= cputopo->numSockets) { goto fail; } } } if (cputopo->numCacheLevels > 0) { for (i=0;i<cputopo->numCacheLevels;i++) { if (cputopo->cacheLevels[i].level > cputopo->numCacheLevels) { goto fail; } } } isIntel = cpuinfo->isIntel; topology_finalize(); return 1; fail: topology_finalize(); return 0; }
int main(int argn, char** argc) { int err, i ,j; int numCPUs = 0; int gid; DATATYPE *a,*b,*c,*d; TimeData timer; double triad_time, copy_time, scale_time, stream_time; char estr[1024]; double result, scalar = 3.0; char* ptr; if (argn != 3) { printf("Usage: %s <cpustr> <events>\n", argc[0]); return 1; } strcpy(estr, argc[2]); allocate_vector(&a, SIZE); allocate_vector(&b, SIZE); allocate_vector(&c, SIZE); allocate_vector(&d, SIZE); err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } CpuTopology_t topo = get_cpuTopology(); affinity_init(); int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads); omp_set_num_threads(numCPUs); err = perfmon_init(numCPUs, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); affinity_finalize(); topology_finalize(); return 1; } gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); affinity_finalize(); topology_finalize(); return 1; } #ifdef _OPENMP printf(HLINE); #pragma omp parallel { #pragma omp master { printf ("Number of Threads requested = %i\n",omp_get_num_threads()); } likwid_pinThread(cpus[omp_get_thread_num()]); printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu()); } #endif #pragma omp parallel for for (int j=0; j<SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; d[j] = 1.0; } err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("copy"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j]; } LIKWID_MARKER_STOP("copy"); } } time_stop(&timer); err = perfmon_stopCounters(); copy_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("scale"); #pragma omp for for (int j=0; j<SIZE; j++) { b[j] = scalar*c[j]; } LIKWID_MARKER_STOP("scale"); } } time_stop(&timer); err = perfmon_stopCounters(); scale_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("stream"); #pragma omp for for (int j=0; j<SIZE; j++) { c[j] = a[j] + b[j]; } LIKWID_MARKER_STOP("stream"); } } time_stop(&timer); err = perfmon_stopCounters(); stream_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(2*SIZE*sizeof(DATATYPE)), copy_time, 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } strcpy(estr, argc[2]); perfmon_setupCounters(gid); err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } time_start(&timer); #pragma omp parallel { for (int k=0; k<ITER; k++) { LIKWID_MARKER_START("triad"); #pragma omp for for (int j=0; j<SIZE; j++) { a[j] = b[j] + c[j] * scalar; } LIKWID_MARKER_STOP("triad"); } } time_stop(&timer); err = perfmon_stopCounters(); triad_time = time_print(&timer)/(double)ITER; if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n", 1E-6*(4*SIZE*sizeof(DATATYPE)), triad_time, 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time)); ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < numCPUs; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } perfmon_finalize(); affinity_finalize(); topology_finalize(); return 0; }
int main(int argc, char* argv[]) { int i, j; int err; int* cpus; int gid; double result = 0.0; char estr[] = "INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2,TEMP_CORE:TMP0"; // Load the topology module and print some values. err = topology_init(); if (err < 0) { printf("Failed to initialize LIKWID's topology module\n"); return 1; } // CpuInfo_t contains global information like name, CPU family, ... CpuInfo_t info = get_cpuInfo(); // CpuTopology_t contains information about the topology of the CPUs. CpuTopology_t topo = get_cpuTopology(); // Create affinity domains. Commonly only needed when reading Uncore counters //affinity_init(); printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads); cpus = (int*)malloc(topo->numHWThreads * sizeof(int)); if (!cpus) return 1; for (i=0;i<topo->numHWThreads;i++) { cpus[i] = topo->threadPool[i].apicId; } // Must be called before perfmon_init() but only if you want to use another // access mode as the pre-configured one. For direct access (0) you have to // be root. //accessClient_setaccessmode(0); // Initialize the perfmon module. err = perfmon_init(topo->numHWThreads, cpus); if (err < 0) { printf("Failed to initialize LIKWID's performance monitoring module\n"); topology_finalize(); return 1; } // Add eventset string to the perfmon module. gid = perfmon_addEventSet(estr); if (gid < 0) { printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr); perfmon_finalize(); topology_finalize(); return 1; } // Setup the eventset identified by group ID (gid). err = perfmon_setupCounters(gid); if (err < 0) { printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid); perfmon_finalize(); topology_finalize(); return 1; } // Start all counters in the previously set up event set. err = perfmon_startCounters(); if (err < 0) { printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Perform something sleep(2); // Stop all counters in the previously started event set. err = perfmon_stopCounters(); if (err < 0) { printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1); perfmon_finalize(); topology_finalize(); return 1; } // Print the result of every thread/CPU for all events in estr. char* ptr = strtok(estr,","); j = 0; while (ptr != NULL) { for (i = 0;i < topo->numHWThreads; i++) { result = perfmon_getResult(gid, j, cpus[i]); printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result); } ptr = strtok(NULL,","); j++; } // Uninitialize the perfmon module. perfmon_finalize(); // Uninitialize the topology module. topology_finalize(); return 0; }