Esempio n. 1
0
static int lua_likwid_init(lua_State* L)
{
    int ret;
    int nrThreads = luaL_checknumber(L,1);
    luaL_argcheck(L, nrThreads > 0, 1, "CPU count must be greater than 0");
    int cpus[nrThreads];
    if (!lua_istable(L, -1)) {
      lua_pushstring(L,"No table given as second argument");
      lua_error(L);
    }
    for (ret = 1; ret<=nrThreads; ret++)
    {
        lua_rawgeti(L,-1,ret);
        cpus[ret-1] = lua_tounsigned(L,-1);
        lua_pop(L,1);
    }
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        numa_init();
        numa_isInitialized = 1;
        numainfo = get_numaTopology();
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (perfmon_isInitialized == 0)
    {
        ret = perfmon_init(nrThreads, &(cpus[0]));
        if (ret != 0)
        {
            lua_pushstring(L,"Cannot initialize likwid perfmon");
            lua_error(L);
            return 1;
        }
        perfmon_isInitialized = 1;
        timer_isInitialized = 1;
        lua_pushinteger(L,ret);
    }
    return 1;
}
Esempio n. 2
0
int main(int argc, char* argv[])
{
    int i;
    int* cpus;
    int gid;
    double result = 0.0;

    // Load the topology module and print some values.
    topology_init();
    // CpuInfo_t contains global information like name, CPU family, ...
    CpuInfo_t info = get_cpuInfo();
    // CpuTopology_t contains information about the topology of the CPUs.
    CpuTopology_t topo = get_cpuTopology();
    printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads);

    cpus = malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;

    for (i=0;i<topo->numHWThreads;i++)
    {
        cpus[i] = topo->threadPool[i].apicId;
    }

    // Must be called before perfmon_init() but only if you want to use another
    // access mode as the pre-configured one. For direct access (0) you have to
    // be root.
    //accessClient_setaccessmode(0);

    // Initialize the perfmon module.
    perfmon_init(topo->numHWThreads, cpus);

    // Add eventset string to the perfmon module.
    gid = perfmon_addEventSet(EVENTSET);

    // Setup the eventset identified by group ID (gid).
    perfmon_setupCounters(gid);
    // Start all counters in the previously set up event set.
    perfmon_startCounters();
    // Perform something
    sleep(2);
    // Stop all counters in the previously started event set.
    perfmon_stopCounters();


    // Print the result of every thread/CPU.
    for (i = 0;i < topo->numHWThreads; i++)
    {
        result = perfmon_getResult(gid, 0, i);
        printf("Measurement result for event set %s at CPU %d: %f\n", EVENTSET, cpus[i], result);
    }

    // Uninitialize the perfmon module.
    perfmon_finalize();
    // Uninitialize the topology module.
    topology_finalize();
    return 0;
}
Esempio n. 3
0
static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    char* cpustring = bstr2cstr(bcpustr, '\0');
    if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR)
    {
        int insert = 0;
        int suitidx = 0;
        int* suitable = (int*)malloc(affinity->numberOfAffinityDomains*sizeof(int));
        if (!suitable)
        {
            bcstrfree(cpustring);
            return -ENOMEM;
        }
        for (int i=0; i<affinity->numberOfAffinityDomains; i++)
        {
            if (bstrchrp(affinity->domains[i].tag, cpustring[0], 0) != BSTR_ERR)
            {
                suitable[suitidx] = i;
                suitidx++;
            }
        }
        int* sortedList = (int*) malloc(affinity->domains[suitable[0]].numberOfProcessors * sizeof(int));
        if (!sortedList)
        {
            free(suitable);
            bcstrfree(cpustring);
            return -ENOMEM;
        }
        for (int off=0;off<affinity->domains[suitable[0]].numberOfProcessors;off++)
        {
            for(int i=0;i < suitidx; i++)
            {
                cpulist_sort(affinity->domains[suitable[i]].processorList, sortedList, affinity->domains[suitable[i]].numberOfProcessors);
                cpulist[insert] = sortedList[off];
                insert++;
                if (insert == length)
                    goto scatter_done;
            }
        }
scatter_done:
        bcstrfree(cpustring);
        free(sortedList);
        free(suitable);
        return insert;
    }
    bcstrfree(cpustring);
    return 0;
}
Esempio n. 4
0
int test_affinityinit()
{
    int i = 0;
    topology_init();
    CpuTopology_t cputopo = get_cpuTopology();
    numa_init();
    affinity_init();
    AffinityDomains_t doms = get_affinityDomains();
    if (doms == NULL)
        goto fail;
    if (doms->numberOfSocketDomains != cputopo->numSockets)
        goto fail;
    if (doms->numberOfNumaDomains == 0)
        goto fail;
    if (doms->numberOfProcessorsPerSocket == 0)
        goto fail;
    if (doms->numberOfAffinityDomains == 0)
        goto fail;
    if (doms->numberOfCacheDomains == 0)
        goto fail;
    if (doms->numberOfCoresPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache < doms->numberOfCoresPerCache)
        goto fail;
    if (doms->domains == NULL)
        goto fail;
    for (i = 0; i < doms->numberOfAffinityDomains; i++)
    {
        if (doms->domains[i].numberOfProcessors == 0)
            goto fail;
        if (doms->domains[i].numberOfCores == 0)
            goto fail;
        if (doms->domains[i].numberOfProcessors < doms->domains[i].numberOfCores)
            goto fail;
        if (doms->domains[i].processorList == NULL)
            goto fail;
    }
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    affinity_finalize();
    topology_finalize();
    return 0;
}
Esempio n. 5
0
static int cpulist_sort(int* incpus, int* outcpus, int length)
{
    int insert = 0;
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    if (length <= 0)
    {
        return -1;
    }
    for (int off=0;off < cpuid_topology->numThreadsPerCore;off++)
    {
        for (int i=0; i<length/cpuid_topology->numThreadsPerCore;i++)
        {
            outcpus[insert] = incpus[(i*cpuid_topology->numThreadsPerCore)+off];
            insert++;
        }
    }
    return insert;
}
Esempio n. 6
0
static int cpuexpr_to_list(bstring bcpustr, bstring prefix, int* list, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ',');
    int oldinsert = 0;
    int insert = 0;
    for (int i=0;i < strlist->qty; i++)
    {
        bstring newstr = bstrcpy(prefix);
        bconcat(newstr, strlist->entry[i]);
        oldinsert = insert;
        for (int j = 0; j < affinity->numberOfAffinityDomains; j++)
        {
            if (bstrcmp(affinity->domains[j].tag, newstr) == 0)
            {
                list[insert] = atoi(bdata(strlist->entry[i]));
                insert++;
                if (insert == length)
                    goto list_done;
                break;
            }
        }
        if (insert == oldinsert)
        {
            fprintf(stderr,"Domain %s cannot be found\n", bdata(newstr));
        }
        bdestroy(newstr);
    }
list_done:
    bstrListDestroy(strlist);
    return insert;
}
Esempio n. 7
0
static int lua_likwid_getAffinityInfo(lua_State* L)
{
    int i,j;
    
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }

    if (!affinity)
    {
        lua_pushstring(L,"Cannot initialize affinity groups");
        lua_error(L);
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfAffinityDomains");
    lua_pushunsigned(L,affinity->numberOfAffinityDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfSocketDomains");
    lua_pushunsigned(L,affinity->numberOfSocketDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfNumaDomains");
    lua_pushunsigned(L,affinity->numberOfNumaDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerSocket");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerSocket);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCacheDomains");
    lua_pushunsigned(L,affinity->numberOfCacheDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCoresPerCache");
    lua_pushunsigned(L,affinity->numberOfCoresPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerCache");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"domains");
    lua_newtable(L);
    for(i=0;i<affinity->numberOfAffinityDomains;i++)
    {
        lua_pushunsigned(L, i+1);
        lua_newtable(L);
        lua_pushstring(L,"tag");
        lua_pushstring(L,bdata(affinity->domains[i].tag));
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,affinity->domains[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfCores");
        lua_pushunsigned(L,affinity->domains[i].numberOfCores);
        lua_settable(L,-3);
        lua_pushstring(L,"processorList");
        lua_newtable(L);
        for(j=0;j<affinity->domains[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,affinity->domains[i].processorList[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}
Esempio n. 8
0
static int lua_likwid_getNumaInfo(lua_State* L)
{
    uint32_t i,j;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
        else
        {
            lua_newtable(L);
            lua_pushstring(L,"numberOfNodes");
            lua_pushunsigned(L,0);
            lua_settable(L,-3);
            lua_pushstring(L,"nodes");
            lua_newtable(L);
            lua_settable(L,-3);
            return 1;
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfNodes");
    lua_pushunsigned(L,numainfo->numberOfNodes);
    lua_settable(L,-3);

    lua_pushstring(L,"nodes");
    lua_newtable(L);
    for(i=0;i<numainfo->numberOfNodes;i++)
    {
        lua_pushinteger(L, i+1);
        lua_newtable(L);
        
        lua_pushstring(L,"id");
        lua_pushunsigned(L,numainfo->nodes[i].id);
        lua_settable(L,-3);
        lua_pushstring(L,"totalMemory");
        lua_pushunsigned(L,numainfo->nodes[i].totalMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"freeMemory");
        lua_pushunsigned(L,numainfo->nodes[i].freeMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfDistances");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfDistances);
        lua_settable(L,-3);
        
        lua_pushstring(L,"processors");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,numainfo->nodes[i].processors[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        /*lua_pushstring(L,"processorsCompact");
        lua_newtable(L);
        for(j=0;j<numa->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j);
            lua_pushunsigned(L,numa->nodes[i].processorsCompact[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);*/
        
        lua_pushstring(L,"distances");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfDistances;j++)
        {
            lua_pushinteger(L,j+1);
            lua_newtable(L);
            lua_pushinteger(L,j);
            lua_pushunsigned(L,numainfo->nodes[i].distances[j]);
            lua_settable(L,-3);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}
Esempio n. 9
0
static int lua_likwid_getCpuTopology(lua_State* L)
{
    int i;
    TreeNode* socketNode;
    int socketCount = 0;
    TreeNode* coreNode;
    int coreCount = 0;
    TreeNode* threadNode;
    int threadCount = 0;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }

    lua_newtable(L);

    lua_pushstring(L,"numHWThreads");
    lua_pushunsigned(L,cputopo->numHWThreads);
    lua_settable(L,-3);

    lua_pushstring(L,"activeHWThreads");
    lua_pushunsigned(L,cputopo->activeHWThreads);
    lua_settable(L,-3);

    lua_pushstring(L,"numSockets");
    lua_pushunsigned(L,cputopo->numSockets);
    lua_settable(L,-3);

    lua_pushstring(L,"numCoresPerSocket");
    lua_pushunsigned(L,cputopo->numCoresPerSocket);
    lua_settable(L,-3);

    lua_pushstring(L,"numThreadsPerCore");
    lua_pushunsigned(L,cputopo->numThreadsPerCore);
    lua_settable(L,-3);

    lua_pushstring(L,"numCacheLevels");
    lua_pushinteger(L,cputopo->numCacheLevels);
    lua_settable(L,-3);

    lua_pushstring(L,"threadPool");
    lua_newtable(L);
    for(i=0;i<cputopo->numHWThreads;i++)
    {
        lua_pushnumber(L,i);
        lua_newtable(L);
        lua_pushstring(L,"threadId");
        lua_pushunsigned(L,cputopo->threadPool[i].threadId);
        lua_settable(L,-3);
        lua_pushstring(L,"coreId");
        lua_pushunsigned(L,cputopo->threadPool[i].coreId);
        lua_settable(L,-3);
        lua_pushstring(L,"packageId");
        lua_pushunsigned(L,cputopo->threadPool[i].packageId);
        lua_settable(L,-3);
        lua_pushstring(L,"apicId");
        lua_pushunsigned(L,cputopo->threadPool[i].apicId);
        lua_settable(L,-3);
        lua_pushstring(L,"inCpuSet");
        lua_pushunsigned(L,cputopo->threadPool[i].inCpuSet);
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);

    lua_pushstring(L,"cacheLevels");
    lua_newtable(L);
    for(i=0;i<cputopo->numCacheLevels;i++)
    {
        lua_pushnumber(L,i+1);
        lua_newtable(L);

        lua_pushstring(L,"level");
        lua_pushunsigned(L,cputopo->cacheLevels[i].level);
        lua_settable(L,-3);

        lua_pushstring(L,"associativity");
        lua_pushunsigned(L,cputopo->cacheLevels[i].associativity);
        lua_settable(L,-3);

        lua_pushstring(L,"sets");
        lua_pushunsigned(L,cputopo->cacheLevels[i].sets);
        lua_settable(L,-3);

        lua_pushstring(L,"lineSize");
        lua_pushunsigned(L,cputopo->cacheLevels[i].lineSize);
        lua_settable(L,-3);

        lua_pushstring(L,"size");
        lua_pushunsigned(L,cputopo->cacheLevels[i].size);
        lua_settable(L,-3);

        lua_pushstring(L,"threads");
        lua_pushunsigned(L,cputopo->cacheLevels[i].threads);
        lua_settable(L,-3);

        lua_pushstring(L,"inclusive");
        lua_pushunsigned(L,cputopo->cacheLevels[i].inclusive);
        lua_settable(L,-3);

        lua_pushstring(L,"type");
        switch (cputopo->cacheLevels[i].type)
        {
            case DATACACHE:
                lua_pushstring(L,"DATACACHE");
                break;
            case INSTRUCTIONCACHE:
                lua_pushstring(L,"INSTRUCTIONCACHE");
                break;
            case UNIFIEDCACHE:
                lua_pushstring(L,"UNIFIEDCACHE");
                break;
            case ITLB:
                lua_pushstring(L,"ITLB");
                break;
            case DTLB:
                lua_pushstring(L,"DTLB");
                break;
            case NOCACHE:
            default:
                lua_pushstring(L,"NOCACHE");
                break;
        }
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);

    lua_pushstring(L,"topologyTree");
    lua_newtable(L);

    socketNode = tree_getChildNode(cputopo->topologyTree);
    while (socketNode != NULL)
    {
        lua_pushinteger(L, socketCount);
        lua_newtable(L);
        lua_pushstring(L, "ID");
        lua_pushunsigned(L,socketNode->id);
        lua_settable(L, -3);
        lua_pushstring(L, "Childs");
        lua_newtable(L);
        coreCount = 0;
        coreNode = tree_getChildNode(socketNode);
        while (coreNode != NULL)
        {
            lua_pushinteger(L, coreCount);
            lua_newtable(L);
            lua_pushstring(L, "ID");
            lua_pushunsigned(L,coreNode->id);
            lua_settable(L,-3);
            lua_pushstring(L, "Childs");
            lua_newtable(L);
            threadNode = tree_getChildNode(coreNode);
            threadCount = 0;
            while (threadNode != NULL)
            {
                lua_pushunsigned(L,threadCount);
                lua_pushunsigned(L,threadNode->id);
                lua_settable(L,-3);
                threadNode = tree_getNextNode(threadNode);
                threadCount++;
            }
            lua_settable(L,-3);
            coreNode = tree_getNextNode(coreNode);
            coreCount++;
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        socketNode = tree_getNextNode(socketNode);
        socketCount++;
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}
Esempio n. 10
0
static int lua_likwid_getPowerInfo(lua_State* L)
{
    
    int i;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (power_isInitialized == 0)
    {
        power_hasRAPL = power_init(0);
        if (power_hasRAPL)
        {
            power_isInitialized = 1;
            power = get_powerInfo();
        }
        else
        {
            return 0;
        }
    }


    lua_newtable(L);
    lua_pushstring(L,"hasRAPL");
    lua_pushboolean(L,power_hasRAPL);
    lua_settable(L,-3);
    lua_pushstring(L,"baseFrequency");
    lua_pushnumber(L,power->baseFrequency);
    lua_settable(L,-3);
    lua_pushstring(L,"minFrequency");
    lua_pushnumber(L,power->minFrequency);
    lua_settable(L,-3);
    lua_pushstring(L,"powerUnit");
    lua_pushnumber(L,power->powerUnit);
    lua_settable(L,-3);
    lua_pushstring(L,"timeUnit");
    lua_pushnumber(L,power->timeUnit);
    lua_settable(L,-3);
    
    lua_pushstring(L,"turbo");
    lua_newtable(L);
    lua_pushstring(L,"numSteps");
    lua_pushunsigned(L,power->turbo.numSteps);
    lua_settable(L,-3);
    lua_pushstring(L,"steps");
    lua_newtable(L);
    for(i=0;i<power->turbo.numSteps;i++)
    {
        lua_pushunsigned(L,i+1);
        lua_pushnumber(L,power->turbo.steps[i]);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    lua_settable(L,-3);

    lua_pushstring(L,"domains");
    lua_newtable(L);
    for(i=0;i<NUM_POWER_DOMAINS;i++)
    {
        lua_pushstring(L,power_names[i]);
        lua_newtable(L);

        lua_pushstring(L, "ID");
        lua_pushnumber(L, power->domains[i].type);
        lua_settable(L,-3);
        lua_pushstring(L, "energyUnit");
        lua_pushnumber(L, power->domains[i].energyUnit);
        lua_settable(L,-3);
        lua_pushstring(L,"supportStatus");
        if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_STATUS)
        {
            lua_pushboolean(L, 1);
        }
        else
        {
            lua_pushboolean(L, 0);
        }
        lua_settable(L,-3);
        lua_pushstring(L,"supportPerf");
        if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_PERF)
        {
            lua_pushboolean(L, 1);
        }
        else
        {
            lua_pushboolean(L, 0);
        }
        lua_settable(L,-3);
        lua_pushstring(L,"supportPolicy");
        if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_POLICY)
        {
            lua_pushboolean(L, 1);
        }
        else
        {
            lua_pushboolean(L, 0);
        }
        lua_settable(L,-3);
        lua_pushstring(L,"supportLimit");
        if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_LIMIT)
        {
            lua_pushboolean(L, 1);
        }
        else
        {
            lua_pushboolean(L, 0);
        }
        lua_settable(L,-3);
        if (power->domains[i].supportFlags & POWER_DOMAIN_SUPPORT_INFO)
        {
            lua_pushstring(L,"supportInfo");
            lua_pushboolean(L, 1);
            lua_settable(L,-3);
            lua_pushstring(L,"tdp");
            lua_pushnumber(L, power->domains[i].tdp);
            lua_settable(L,-3);
            lua_pushstring(L,"minPower");
            lua_pushnumber(L, power->domains[i].minPower);
            lua_settable(L,-3);
            lua_pushstring(L,"maxPower");
            lua_pushnumber(L, power->domains[i].maxPower);
            lua_settable(L,-3);
            lua_pushstring(L,"maxTimeWindow");
            lua_pushnumber(L, power->domains[i].maxTimeWindow);
            lua_settable(L,-3);
        }
        else
        {
            lua_pushstring(L,"supportInfo");
            lua_pushboolean(L, 0);
            lua_settable(L,-3);
        }

        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    

    return 1;
}
Esempio n. 11
0
bstring parse_workgroup(Workgroup* group, const_bstring str, DataType type)
{
    CpuTopology_t topo;
    struct bstrList* tokens;
    bstring cpustr;
    int numThreads = 0;
    bstring domain;


    tokens = bsplit(str,':');
    if (tokens->qty == 2)
    {
        topo = get_cpuTopology();
        numThreads = topo->activeHWThreads;
        cpustr = bformat("E:%s:%d", bdata(tokens->entry[0]), numThreads );
    }
    else if (tokens->qty == 3)
    {
        cpustr = bformat("E:%s:%s", bdata(tokens->entry[0]), bdata(tokens->entry[2]));
        numThreads = str2int(bdata(tokens->entry[2]));
        if (numThreads < 0)
        {
            fprintf(stderr, "Cannot convert %s to integer\n", bdata(tokens->entry[2]));
            bstrListDestroy(tokens);
            return NULL;
        }
    }
    else if (tokens->qty == 5)
    {
        cpustr = bformat("E:%s:%s:%s:%s", bdata(tokens->entry[0]),
                                          bdata(tokens->entry[2]),
                                          bdata(tokens->entry[3]),
                                          bdata(tokens->entry[4]));
        numThreads = str2int(bdata(tokens->entry[2]));
        if (numThreads < 0)
        {
            fprintf(stderr, "Cannot convert %s to integer\n", bdata(tokens->entry[2]));
            bstrListDestroy(tokens);
            return NULL;
        }
    }
    else
    {
        fprintf(stderr, "Misformated workgroup string\n");
        bstrListDestroy(tokens);
        return NULL;
    }

    group->size = bstr_to_doubleSize(tokens->entry[1], type);
    if (group->size == 0)
    {
        fprintf(stderr, "Stream size cannot be read, should look like <domain>:<size>\n");
        bstrListDestroy(tokens);
        return NULL;
    }
    group->processorIds = (int*) malloc(numThreads * sizeof(int));
    if (group->processorIds == NULL)
    {
        fprintf(stderr, "No more memory to allocate list of processors\n");
        bstrListDestroy(tokens);
        return NULL;
    }
    group->numberOfThreads = numThreads;
    if (cpustr_to_cpulist(bdata(cpustr),group->processorIds, numThreads) < 0 )
    {
        free(group->processorIds);
        bstrListDestroy(tokens);
        return NULL;
    }
    domain = bstrcpy(tokens->entry[0]);
    bdestroy(cpustr);
    bstrListDestroy(tokens);
    return domain;
}
Esempio n. 12
0
int cpustr_to_cpulist(char* cpustring, int* cpulist, int length)
{
    int insert = 0;
    int len = 0;
    int ret = 0;
    bstring bcpustr = bfromcstr(cpustring);
    struct bstrList* strlist = bstrListCreate();
    bstring scattercheck = bformat("scatter");
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    strlist = bsplit(bcpustr, '@');

    int* tmpList = (int*)malloc(length * sizeof(int));
    if (tmpList == NULL)
    {
        bstrListDestroy(strlist);
        bdestroy(scattercheck);
        bdestroy(bcpustr);
        return -ENOMEM;
    }
    for (int i=0; i< strlist->qty; i++)
    {
        if (binstr(strlist->entry[i], 0, scattercheck) != BSTR_ERR)
        {
            ret = cpustr_to_cpulist_scatter(strlist->entry[i], tmpList, length);
            insert += cpulist_concat(cpulist, insert, tmpList, ret);
        }
        else if (bstrchrp(strlist->entry[i], 'E', 0) == 0)
        {
            ret = cpustr_to_cpulist_expression(strlist->entry[i], tmpList, length);
            insert += cpulist_concat(cpulist, insert, tmpList, ret);
        }
        else if (bstrchrp(strlist->entry[i], 'L', 0) == 0)
        {
            ret = cpustr_to_cpulist_logical(strlist->entry[i], tmpList, length);
            insert += cpulist_concat(cpulist, insert, tmpList, ret);
        }
        else if (cpuid_topology->activeHWThreads < cpuid_topology->numHWThreads)
        {
            fprintf(stdout, "INFO: You are running LIKWID in a cpuset with %d CPUs, only logical numbering allowed\n", cpuid_topology->activeHWThreads);
            if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) ||
                (bstrchrp(strlist->entry[i], 'S', 0) == 0) ||
                (bstrchrp(strlist->entry[i], 'C', 0) == 0) ||
                (bstrchrp(strlist->entry[i], 'M', 0) == 0)) &&
                (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR))
            {
                bstring newstr = bformat("L:");
                bconcat(newstr, strlist->entry[i]);
                ret = cpustr_to_cpulist_logical(newstr, tmpList, length);
                insert += cpulist_concat(cpulist, insert, tmpList, ret);
                bdestroy(newstr);
            }
            else
            {
                bstring newstr = bformat("L:N:");
                bconcat(newstr, strlist->entry[i]);
                ret = cpustr_to_cpulist_logical(newstr, tmpList, length);
                insert += cpulist_concat(cpulist, insert, tmpList, ret);
                bdestroy(newstr);
            }
        }
        else if (((bstrchrp(strlist->entry[i], 'N', 0) == 0) ||
            (bstrchrp(strlist->entry[i], 'S', 0) == 0) ||
            (bstrchrp(strlist->entry[i], 'C', 0) == 0) ||
            (bstrchrp(strlist->entry[i], 'M', 0) == 0)) &&
            (bstrchrp(strlist->entry[i], ':', 0) != BSTR_ERR))
        {
            bstring newstr = bformat("L:");
            bconcat(newstr, strlist->entry[i]);
            ret = cpustr_to_cpulist_logical(newstr, tmpList, length);
            insert += cpulist_concat(cpulist, insert, tmpList, ret);
            bdestroy(newstr);
        }

        else
        {
            ret = cpustr_to_cpulist_physical(strlist->entry[i], tmpList, length);
            insert += cpulist_concat(cpulist, insert, tmpList, ret);
        }
    }
    free(tmpList);
    bstrListDestroy(strlist);
    return insert;
}
Esempio n. 13
0
static int cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    bstring bdomain;
    bstring blist;
    int domainidx = -1;
    if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR)
    {
        struct bstrList* strlist = bstrListCreate();
        strlist = bsplit(bcpustr, ':');
        bdomain = bstrcpy(strlist->entry[0]);
        blist = bstrcpy(strlist->entry[1]);
        bstrListDestroy(strlist);
    }
    else
    {
        bdomain = bformat("N");
        blist = bstrcpy(bcpustr);
    }
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        bdestroy(bdomain);
        bdestroy(blist);
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(blist, ',');
    int insert = 0;
    for (int i=0;i< strlist->qty; i++)
    {
        if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
        {
            struct bstrList* indexlist = bstrListCreate();
            indexlist = bsplit(strlist->entry[i], '-');
            if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++)
                {
                    if (cpu_in_domain(domainidx, j))
                    {
                        cpulist[insert] = j;
                        insert++;
                        if (insert == length)
                        {
                            bstrListDestroy(indexlist);
                            goto physical_done;
                        }
                    }
                    else
                    {
                        fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag));
                    }
                }
            }
            else
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--)
                {
                    if (cpu_in_domain(domainidx, j))
                    {
                        cpulist[insert] = j;
                        insert++;
                        if (insert == length)
                        {
                            bstrListDestroy(indexlist);
                            goto physical_done;
                        }
                    }
                    else
                    {
                        fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag));
                    }
                }
            }
            bstrListDestroy(indexlist);
        }
        else
        {
            int cpu = atoi(bdata(strlist->entry[i]));
            if (cpu_in_domain(domainidx, cpu))
            {
                cpulist[insert] = cpu;
                insert++;
                if (insert == length)
                {
                    goto physical_done;
                }
            }
            else
            {
                fprintf(stderr, "CPU %d not in domain %s\n", cpu, bdata(affinity->domains[domainidx].tag));
            }
        }
    }
physical_done:
    bstrListDestroy(strlist);
    bdestroy(bdomain);
    bdestroy(blist);
    return insert;
}
Esempio n. 14
0
static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    int domainidx = -1;
    bstring bdomain;
    bstring blist;
    if (bstrchrp(bcpustr, 'L', 0) != 0)
    {
        fprintf(stderr, "Not a valid CPU expression\n");
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ':');
    if (strlist->qty != 3)
    {
        fprintf(stderr, "ERROR: Invalid expression, should look like L:<domain>:<indexlist> or be in a cpuset\n");
        bstrListDestroy(strlist);
        return 0;
    }
    bdomain = bstrcpy(strlist->entry[1]);
    blist = bstrcpy(strlist->entry[2]);
    bstrListDestroy(strlist);
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        return 0;
    }
    int *inlist = malloc(affinity->domains[domainidx].numberOfProcessors * sizeof(int));
    if (inlist == NULL)
    {
        return -ENOMEM;
    }
    int ret = cpulist_sort(affinity->domains[domainidx].processorList, inlist, affinity->domains[domainidx].numberOfProcessors);

    strlist = bstrListCreate();
    strlist = bsplit(blist, ',');
    int insert = 0;
    for (int i=0; i< strlist->qty; i++)
    {
        if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
        {
            struct bstrList* indexlist = bstrListCreate();
            indexlist = bsplit(strlist->entry[i], '-');
            if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++)
                {
                    cpulist[insert] = inlist[j];
                    insert++;
                    if (insert == length)
                    {
                        bstrListDestroy(indexlist);
                        goto logical_done;
                    }
                }
            }
            else
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--)
                {
                    cpulist[insert] = inlist[j];
                    insert++;
                    if (insert == length)
                    {
                        bstrListDestroy(indexlist);
                        goto logical_done;
                    }
                }
            }
            bstrListDestroy(indexlist);
        }
        else
        {
            cpulist[insert] = inlist[atoi(bdata(strlist->entry[i]))];
            insert++;
            if (insert == length)
            {
                goto logical_done;
            }
        }
    }
logical_done:
    free(inlist);
    bstrListDestroy(strlist);
    return insert;
}
Esempio n. 15
0
static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    bstring bdomain;
    int domainidx = -1;
    int count = 0;
    int stride = 0;
    int chunk = 0;
    if (bstrchrp(bcpustr, 'E', 0) != 0)
    {
        fprintf(stderr, "Not a valid CPU expression\n");
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ':');
    if (strlist->qty == 3)
    {
        bdomain = bstrcpy(strlist->entry[1]);
        count = atoi(bdata(strlist->entry[2]));
        stride = 1;
        chunk = 1;
    }
    else if (strlist->qty == 5)
    {
        bdomain = bstrcpy(strlist->entry[1]);
        count = atoi(bdata(strlist->entry[2]));
        chunk = atoi(bdata(strlist->entry[3]));
        stride = atoi(bdata(strlist->entry[4]));
    }
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        bstrListDestroy(strlist);
        return 0;
    }
    int offset = 0;
    int insert = 0;
    for (int i=0;i<count;i++)
    {
        for (int j=0;j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++)
        {
            cpulist[insert] = affinity->domains[domainidx].processorList[offset + j];
            insert++;
            if (insert == length)
                goto expression_done;
        }
        offset += stride;
        if (offset >= affinity->domains[domainidx].numberOfProcessors)
        {
            offset = 0;
        }
        if (insert >= count)
            goto expression_done;
    }
    bstrListDestroy(strlist);
    return 0;
expression_done:
    bstrListDestroy(strlist);
    return insert;
}
Esempio n. 16
0
int test_topologyinit()
{
    int i, j;
    int ret = topology_init();
    if (ret != 0)
        goto fail;
    CpuInfo_t cpuinfo = get_cpuInfo();
    if (cpuinfo == NULL)
        goto fail;
    if (cpuinfo->family == 0)
        goto fail;
    if (cpuinfo->model == 0)
        goto fail;
    if (cpuinfo->osname == NULL)
        goto fail;
    if (cpuinfo->name == NULL)
        goto fail;
    if (cpuinfo->features == NULL)
        goto fail;
    CpuTopology_t cputopo = get_cpuTopology();
    if (cputopo->threadPool == NULL)
        goto fail;
    if (cputopo->cacheLevels == NULL)
        goto fail;
    if (cputopo->numHWThreads == 0)
        goto fail;
    if (cputopo->activeHWThreads == 0)
        goto fail;
    if (cputopo->numSockets == 0)
        goto fail;
    if (cputopo->numCoresPerSocket < 1)
        goto fail;
    if (cputopo->numThreadsPerCore < 1)
        goto fail;
    if (cputopo->numHWThreads > 0)
    {
        for (i = 0; i < cputopo->numHWThreads; i++)
        {
            for (j=0;j< cputopo->numHWThreads; j++)
            {
                if ((i != j) && (cputopo->threadPool[i].apicId == cputopo->threadPool[j].apicId))
                    goto fail;
            }
            if (cputopo->threadPool[i].threadId >= cputopo->numThreadsPerCore)
            {
                goto fail;
            }
            if (cputopo->threadPool[i].packageId >= cputopo->numSockets)
            {
                goto fail;
            }
        }
    }
    if (cputopo->numCacheLevels > 0)
    {
        for (i=0;i<cputopo->numCacheLevels;i++)
        {
            if (cputopo->cacheLevels[i].level > cputopo->numCacheLevels)
            {
                goto fail;
            }

        }
    }
    isIntel = cpuinfo->isIntel;
    topology_finalize();
    return 1;
fail:
    topology_finalize();
    return 0;
}
Esempio n. 17
0
int main(int argn, char** argc)
{
    int err, i ,j;
    int numCPUs = 0;
    int gid;
    DATATYPE *a,*b,*c,*d;
    TimeData timer;
    double triad_time, copy_time, scale_time, stream_time;
    char estr[1024];
    double result, scalar = 3.0;
    char* ptr;

    if (argn != 3)
    {
        printf("Usage: %s <cpustr> <events>\n", argc[0]);
        return 1;
    }

    strcpy(estr, argc[2]);

    allocate_vector(&a, SIZE);
    allocate_vector(&b, SIZE);
    allocate_vector(&c, SIZE);
    allocate_vector(&d, SIZE);

    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    CpuTopology_t topo = get_cpuTopology();
    affinity_init();
    int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;
    numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
    omp_set_num_threads(numCPUs);
    err = perfmon_init(numCPUs, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        affinity_finalize();
        topology_finalize();
        return 1;
    }
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

#ifdef _OPENMP
    printf(HLINE);
#pragma omp parallel
    {
#pragma omp master
    {
        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
    }
    likwid_pinThread(cpus[omp_get_thread_num()]);
    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
    }
#endif

#pragma omp parallel for
    for (int j=0; j<SIZE; j++) {
        a[j] = 1.0;
        b[j] = 2.0;
        c[j] = 0.0;
        d[j] = 1.0;
    }

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("copy");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j];
            }
            LIKWID_MARKER_STOP("copy");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    copy_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("scale");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                b[j] = scalar*c[j];
            }
            LIKWID_MARKER_STOP("scale");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    scale_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("stream");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j] + b[j];
            }
            LIKWID_MARKER_STOP("stream");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    stream_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {

            LIKWID_MARKER_START("triad");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                a[j] = b[j] +  c[j] * scalar;
            }
            LIKWID_MARKER_STOP("triad");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    triad_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }



    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(4*SIZE*sizeof(DATATYPE)),
                        triad_time,
                        1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Esempio n. 18
0
int main(int argc, char* argv[])
{
    int i, j;
    int err;
    int* cpus;
    int gid;
    double result = 0.0;
    char estr[] = "INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2,TEMP_CORE:TMP0";
    // Load the topology module and print some values.
    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    // CpuInfo_t contains global information like name, CPU family, ...
    CpuInfo_t info = get_cpuInfo();
    // CpuTopology_t contains information about the topology of the CPUs.
    CpuTopology_t topo = get_cpuTopology();
    // Create affinity domains. Commonly only needed when reading Uncore counters
    //affinity_init();

    printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads);

    cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;

    for (i=0;i<topo->numHWThreads;i++)
    {
        cpus[i] = topo->threadPool[i].apicId;
    }

    // Must be called before perfmon_init() but only if you want to use another
    // access mode as the pre-configured one. For direct access (0) you have to
    // be root.
    //accessClient_setaccessmode(0);

    // Initialize the perfmon module.
    err = perfmon_init(topo->numHWThreads, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        topology_finalize();
        return 1;
    }

    // Add eventset string to the perfmon module.
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    // Setup the eventset identified by group ID (gid).
    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    // Start all counters in the previously set up event set.
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    // Perform something
    sleep(2);
    // Stop all counters in the previously started event set.
    err = perfmon_stopCounters();
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }


    // Print the result of every thread/CPU for all events in estr.
    char* ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < topo->numHWThreads; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    // Uninitialize the perfmon module.
    perfmon_finalize();
    // Uninitialize the topology module.
    topology_finalize();
    return 0;
}