C++ (Cpp) numa_initの例

プログラミング言語: C++ (Cpp)

メソッド/関数: numa_init

hotexamples.comのコード掲載数: 14

C++ (Cpp) numa_init - 14件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のnuma_initの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test-likwidAPI.c プロジェクト: ThreeGe/likwid

int test_numainit()
{
    int i = 0;
    topology_init();
    numa_init();
    NumaTopology_t numainfo = get_numaTopology();
    if (numainfo == NULL)
        goto fail;
    if (numainfo->numberOfNodes <= 0)
        goto fail;
    if (likwid_getNumberOfNodes() <= 0)
        goto fail;
    for (i = 0; i < likwid_getNumberOfNodes(); i++)
    {
        if (numainfo->nodes[i].totalMemory == 0)
            goto fail;
        if (numainfo->nodes[i].freeMemory == 0)
            goto fail;
        if (numainfo->nodes[i].numberOfProcessors == 0)
            goto fail;
        if (numainfo->nodes[i].numberOfDistances == 0)
            goto fail;
        if (numainfo->nodes[i].numberOfDistances != likwid_getNumberOfNodes())
            goto fail;
    }
    numa_finalize();
    topology_finalize();
    return 1;
fail:
    numa_finalize();
    topology_finalize();
    return 0;
}

コード例 #2

ファイルを表示

ファイル: luawid.c プロジェクト: WaldonChen/likwid

static int lua_likwid_init(lua_State* L)
{
    int ret;
    int nrThreads = luaL_checknumber(L,1);
    luaL_argcheck(L, nrThreads > 0, 1, "CPU count must be greater than 0");
    int cpus[nrThreads];
    if (!lua_istable(L, -1)) {
      lua_pushstring(L,"No table given as second argument");
      lua_error(L);
    }
    for (ret = 1; ret<=nrThreads; ret++)
    {
        lua_rawgeti(L,-1,ret);
        cpus[ret-1] = lua_tounsigned(L,-1);
        lua_pop(L,1);
    }
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        numa_init();
        numa_isInitialized = 1;
        numainfo = get_numaTopology();
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (perfmon_isInitialized == 0)
    {
        ret = perfmon_init(nrThreads, &(cpus[0]));
        if (ret != 0)
        {
            lua_pushstring(L,"Cannot initialize likwid perfmon");
            lua_error(L);
            return 1;
        }
        perfmon_isInitialized = 1;
        timer_isInitialized = 1;
        lua_pushinteger(L,ret);
    }
    return 1;
}

コード例 #3

ファイルを表示

ファイル: test-likwidAPI.c プロジェクト: ThreeGe/likwid

int test_affinityinit()
{
    int i = 0;
    topology_init();
    CpuTopology_t cputopo = get_cpuTopology();
    numa_init();
    affinity_init();
    AffinityDomains_t doms = get_affinityDomains();
    if (doms == NULL)
        goto fail;
    if (doms->numberOfSocketDomains != cputopo->numSockets)
        goto fail;
    if (doms->numberOfNumaDomains == 0)
        goto fail;
    if (doms->numberOfProcessorsPerSocket == 0)
        goto fail;
    if (doms->numberOfAffinityDomains == 0)
        goto fail;
    if (doms->numberOfCacheDomains == 0)
        goto fail;
    if (doms->numberOfCoresPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache < doms->numberOfCoresPerCache)
        goto fail;
    if (doms->domains == NULL)
        goto fail;
    for (i = 0; i < doms->numberOfAffinityDomains; i++)
    {
        if (doms->domains[i].numberOfProcessors == 0)
            goto fail;
        if (doms->domains[i].numberOfCores == 0)
            goto fail;
        if (doms->domains[i].numberOfProcessors < doms->domains[i].numberOfCores)
            goto fail;
        if (doms->domains[i].processorList == NULL)
            goto fail;
    }
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    affinity_finalize();
    topology_finalize();
    return 0;
}

コード例 #4

ファイルを表示

ファイル: libperfctr.c プロジェクト: haibo031031/likwid

void likwid_markerInit(void)
{
    int i;
    int verbosity;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL))
    {
        likwid_init = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n");
        fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    HPMmode(atoi(modeStr));

    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bstrListCreate();
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);
    
    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bstrListCreate();
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
        }
    }

    groupSet->activeGroup = 0;
}

コード例 #5

ファイルを表示

ファイル: luawid.c プロジェクト: WaldonChen/likwid

static int lua_likwid_getAffinityInfo(lua_State* L)
{
    int i,j;
    
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }

    if (!affinity)
    {
        lua_pushstring(L,"Cannot initialize affinity groups");
        lua_error(L);
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfAffinityDomains");
    lua_pushunsigned(L,affinity->numberOfAffinityDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfSocketDomains");
    lua_pushunsigned(L,affinity->numberOfSocketDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfNumaDomains");
    lua_pushunsigned(L,affinity->numberOfNumaDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerSocket");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerSocket);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCacheDomains");
    lua_pushunsigned(L,affinity->numberOfCacheDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCoresPerCache");
    lua_pushunsigned(L,affinity->numberOfCoresPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerCache");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"domains");
    lua_newtable(L);
    for(i=0;i<affinity->numberOfAffinityDomains;i++)
    {
        lua_pushunsigned(L, i+1);
        lua_newtable(L);
        lua_pushstring(L,"tag");
        lua_pushstring(L,bdata(affinity->domains[i].tag));
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,affinity->domains[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfCores");
        lua_pushunsigned(L,affinity->domains[i].numberOfCores);
        lua_settable(L,-3);
        lua_pushstring(L,"processorList");
        lua_newtable(L);
        for(j=0;j<affinity->domains[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,affinity->domains[i].processorList[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}

コード例 #6

ファイルを表示

ファイル: luawid.c プロジェクト: WaldonChen/likwid

static int lua_likwid_getNumaInfo(lua_State* L)
{
    uint32_t i,j;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
        else
        {
            lua_newtable(L);
            lua_pushstring(L,"numberOfNodes");
            lua_pushunsigned(L,0);
            lua_settable(L,-3);
            lua_pushstring(L,"nodes");
            lua_newtable(L);
            lua_settable(L,-3);
            return 1;
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfNodes");
    lua_pushunsigned(L,numainfo->numberOfNodes);
    lua_settable(L,-3);

    lua_pushstring(L,"nodes");
    lua_newtable(L);
    for(i=0;i<numainfo->numberOfNodes;i++)
    {
        lua_pushinteger(L, i+1);
        lua_newtable(L);
        
        lua_pushstring(L,"id");
        lua_pushunsigned(L,numainfo->nodes[i].id);
        lua_settable(L,-3);
        lua_pushstring(L,"totalMemory");
        lua_pushunsigned(L,numainfo->nodes[i].totalMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"freeMemory");
        lua_pushunsigned(L,numainfo->nodes[i].freeMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfDistances");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfDistances);
        lua_settable(L,-3);
        
        lua_pushstring(L,"processors");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,numainfo->nodes[i].processors[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        /*lua_pushstring(L,"processorsCompact");
        lua_newtable(L);
        for(j=0;j<numa->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j);
            lua_pushunsigned(L,numa->nodes[i].processorsCompact[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);*/
        
        lua_pushstring(L,"distances");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfDistances;j++)
        {
            lua_pushinteger(L,j+1);
            lua_newtable(L);
            lua_pushinteger(L,j);
            lua_pushunsigned(L,numainfo->nodes[i].distances[j]);
            lua_settable(L,-3);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}

コード例 #7

ファイルを表示

ファイル: luawid.c プロジェクト: WaldonChen/likwid

static int lua_likwid_getCpuTopology(lua_State* L)
{
    int i;
    TreeNode* socketNode;
    int socketCount = 0;
    TreeNode* coreNode;
    int coreCount = 0;
    TreeNode* threadNode;
    int threadCount = 0;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }

    lua_newtable(L);

    lua_pushstring(L,"numHWThreads");
    lua_pushunsigned(L,cputopo->numHWThreads);
    lua_settable(L,-3);

    lua_pushstring(L,"activeHWThreads");
    lua_pushunsigned(L,cputopo->activeHWThreads);
    lua_settable(L,-3);

    lua_pushstring(L,"numSockets");
    lua_pushunsigned(L,cputopo->numSockets);
    lua_settable(L,-3);

    lua_pushstring(L,"numCoresPerSocket");
    lua_pushunsigned(L,cputopo->numCoresPerSocket);
    lua_settable(L,-3);

    lua_pushstring(L,"numThreadsPerCore");
    lua_pushunsigned(L,cputopo->numThreadsPerCore);
    lua_settable(L,-3);

    lua_pushstring(L,"numCacheLevels");
    lua_pushinteger(L,cputopo->numCacheLevels);
    lua_settable(L,-3);

    lua_pushstring(L,"threadPool");
    lua_newtable(L);
    for(i=0;i<cputopo->numHWThreads;i++)
    {
        lua_pushnumber(L,i);
        lua_newtable(L);
        lua_pushstring(L,"threadId");
        lua_pushunsigned(L,cputopo->threadPool[i].threadId);
        lua_settable(L,-3);
        lua_pushstring(L,"coreId");
        lua_pushunsigned(L,cputopo->threadPool[i].coreId);
        lua_settable(L,-3);
        lua_pushstring(L,"packageId");
        lua_pushunsigned(L,cputopo->threadPool[i].packageId);
        lua_settable(L,-3);
        lua_pushstring(L,"apicId");
        lua_pushunsigned(L,cputopo->threadPool[i].apicId);
        lua_settable(L,-3);
        lua_pushstring(L,"inCpuSet");
        lua_pushunsigned(L,cputopo->threadPool[i].inCpuSet);
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);

    lua_pushstring(L,"cacheLevels");
    lua_newtable(L);
    for(i=0;i<cputopo->numCacheLevels;i++)
    {
        lua_pushnumber(L,i+1);
        lua_newtable(L);

        lua_pushstring(L,"level");
        lua_pushunsigned(L,cputopo->cacheLevels[i].level);
        lua_settable(L,-3);

        lua_pushstring(L,"associativity");
        lua_pushunsigned(L,cputopo->cacheLevels[i].associativity);
        lua_settable(L,-3);

        lua_pushstring(L,"sets");
        lua_pushunsigned(L,cputopo->cacheLevels[i].sets);
        lua_settable(L,-3);

        lua_pushstring(L,"lineSize");
        lua_pushunsigned(L,cputopo->cacheLevels[i].lineSize);
        lua_settable(L,-3);

        lua_pushstring(L,"size");
        lua_pushunsigned(L,cputopo->cacheLevels[i].size);
        lua_settable(L,-3);

        lua_pushstring(L,"threads");
        lua_pushunsigned(L,cputopo->cacheLevels[i].threads);
        lua_settable(L,-3);

        lua_pushstring(L,"inclusive");
        lua_pushunsigned(L,cputopo->cacheLevels[i].inclusive);
        lua_settable(L,-3);

        lua_pushstring(L,"type");
        switch (cputopo->cacheLevels[i].type)
        {
            case DATACACHE:
                lua_pushstring(L,"DATACACHE");
                break;
            case INSTRUCTIONCACHE:
                lua_pushstring(L,"INSTRUCTIONCACHE");
                break;
            case UNIFIEDCACHE:
                lua_pushstring(L,"UNIFIEDCACHE");
                break;
            case ITLB:
                lua_pushstring(L,"ITLB");
                break;
            case DTLB:
                lua_pushstring(L,"DTLB");
                break;
            case NOCACHE:
            default:
                lua_pushstring(L,"NOCACHE");
                break;
        }
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);

    lua_pushstring(L,"topologyTree");
    lua_newtable(L);

    socketNode = tree_getChildNode(cputopo->topologyTree);
    while (socketNode != NULL)
    {
        lua_pushinteger(L, socketCount);
        lua_newtable(L);
        lua_pushstring(L, "ID");
        lua_pushunsigned(L,socketNode->id);
        lua_settable(L, -3);
        lua_pushstring(L, "Childs");
        lua_newtable(L);
        coreCount = 0;
        coreNode = tree_getChildNode(socketNode);
        while (coreNode != NULL)
        {
            lua_pushinteger(L, coreCount);
            lua_newtable(L);
            lua_pushstring(L, "ID");
            lua_pushunsigned(L,coreNode->id);
            lua_settable(L,-3);
            lua_pushstring(L, "Childs");
            lua_newtable(L);
            threadNode = tree_getChildNode(coreNode);
            threadCount = 0;
            while (threadNode != NULL)
            {
                lua_pushunsigned(L,threadCount);
                lua_pushunsigned(L,threadNode->id);
                lua_settable(L,-3);
                threadNode = tree_getNextNode(threadNode);
                threadCount++;
            }
            lua_settable(L,-3);
            coreNode = tree_getNextNode(coreNode);
            coreCount++;
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        socketNode = tree_getNextNode(socketNode);
        socketCount++;
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}

コード例 #8

ファイルを表示

ファイル: libperfctr.c プロジェクト: RRZE-HPC/likwid

void
likwid_markerInit(void)
{
    int i;
    int verbosity;
    int setinit = 0;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    char* perfpid = getenv("LIKWID_PERF_EXECPID");
    char execpid[20];
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0)
    {
        setinit = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

//#ifndef LIKWID_USE_PERFEVENT
    HPMmode(atoi(modeStr));
//#endif
    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);

    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }
#ifdef LIKWID_USE_PERFEVENT
    if (perfpid != NULL)
    {
        snprintf(execpid, 19, "%d", getpid());
        setenv("LIKWID_PERF_PID", execpid, 1);
        char* perfflags = getenv("LIKWID_PERF_FLAGS");
        if (perfflags)
        {
            setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1);
        }
    }
#endif

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
            groupSet->groups[groups[0]].state = STATE_START;
        }
    }
    if (setinit)
    {
        likwid_init = 1;
    }
    threads2Pthread[registered_cpus] = pthread_self();
    registered_cpus++;

    groupSet->activeGroup = 0;

    perfmon_setupCounters(groupSet->activeGroup);
    perfmon_startCounters();
}

コード例 #9

ファイルを表示

void likwid_markerInit(void)
{
    int cpuId = likwid_getProcessorId();
    char* modeStr = getenv("LIKWID_MODE");
    char* maskStr = getenv("LIKWID_MASK");

    if ((modeStr != NULL) && (maskStr != NULL))
    {
        likwid_init = 1;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    cpuid_init();
    numa_init();
    affinity_init();
    timer_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_THREADS; i++) thread_socketFD[i] = -1;
    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    accessClient_mode = atoi(modeStr);
    str2BitMask(maskStr, &counterMask);

    if (accessClient_mode != DAEMON_AM_DIRECT)
    {
        accessClient_init(&thread_socketFD[cpuId]);
    }

    msr_init(thread_socketFD[cpuId]);
    thermal_init(cpuId);

    switch ( cpuid_info.family )
    {
        case P6_FAMILY:

            switch ( cpuid_info.model )
            {
                case PENTIUM_M_BANIAS:

                case PENTIUM_M_DOTHAN:

                    perfmon_counter_map = pm_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_PM;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_PM;
                    break;

                case ATOM_45:

                case ATOM_32:

                case ATOM_22:

                case ATOM:

                    perfmon_counter_map = core2_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_CORE2;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2;
                    break;

                case CORE_DUO:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;

                case XEON_MP:

                case CORE2_65:

                case CORE2_45:

                    perfmon_counter_map = core2_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_CORE2;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2;
                    break;

                case NEHALEM_EX:

                case WESTMERE_EX:

                    perfmon_counter_map = westmereEX_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_WESTMEREEX;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_WESTMEREEX;
                    perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_WESTMEREEX;
                    break;

                case NEHALEM_BLOOMFIELD:

                case NEHALEM_LYNNFIELD:

                case NEHALEM_WESTMERE_M:

                case NEHALEM_WESTMERE:

                    perfmon_counter_map = nehalem_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_NEHALEM;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_NEHALEM;
                    perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_NEHALEM;
                    break;

                case IVYBRIDGE:

                case IVYBRIDGE_EP:

                    {
                        int socket_fd = thread_socketFD[cpuId];
                        hasPCICounters = 1;
                        power_init(0); /* FIXME Static coreId is dangerous */
                        pci_init(socket_fd);
                        perfmon_counter_map = ivybridge_counter_map;
                        perfmon_numCounters = NUM_COUNTERS_IVYBRIDGE;
                        perfmon_numCountersCore = NUM_COUNTERS_CORE_IVYBRIDGE;
                        perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_IVYBRIDGE;
                    }
                    break;

                case HASWELL:

                case HASWELL_EX:

                case HASWELL_M1:

                case HASWELL_M2:

                    power_init(0); /* FIXME Static coreId is dangerous */

                    perfmon_counter_map = haswell_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_HASWELL;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_HASWELL;
                    break;

                case SANDYBRIDGE:

                case SANDYBRIDGE_EP:

                    {
                        int socket_fd = thread_socketFD[cpuId];
                        hasPCICounters = 1;
                        power_init(0); /* FIXME Static coreId is dangerous */
                        pci_init(socket_fd);
                        perfmon_counter_map = sandybridge_counter_map;
                        perfmon_numCounters = NUM_COUNTERS_SANDYBRIDGE;
                        perfmon_numCountersCore = NUM_COUNTERS_CORE_SANDYBRIDGE;
                        perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_SANDYBRIDGE;
                    }
                    break;

                default:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;
            }
            break;

        case MIC_FAMILY:

            switch ( cpuid_info.model )
            {
                case XEON_PHI:

                    perfmon_counter_map = phi_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_PHI;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_PHI;
                    break;

                default:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;
            }
            break;

        case K8_FAMILY:

            perfmon_counter_map = k10_counter_map;
            perfmon_numCounters = NUM_COUNTERS_K10;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_K10;
            break;

        case K10_FAMILY:

            perfmon_counter_map = k10_counter_map;
            perfmon_numCounters = NUM_COUNTERS_K10;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_K10;
            break;

        case K15_FAMILY:

            perfmon_counter_map = interlagos_counter_map;
            perfmon_numCounters = NUM_COUNTERS_INTERLAGOS;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_INTERLAGOS;
            break;

        case K16_FAMILY:

            perfmon_counter_map = kabini_counter_map;
            perfmon_numCounters = NUM_COUNTERS_KABINI;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_KABINI;
            break;

        default:
            ERROR_PLAIN_PRINT(Unsupported Processor);
            break;
    }
}

コード例 #10

ファイルを表示

ファイル: likwid-bench.c プロジェクト: schaars/kzimp

int main(int argc, char** argv)
{
    int iter = 100;
    uint32_t i;
    uint32_t j;
    int globalNumberOfThreads = 0;
    int optPrintDomains = 0;
    int c;
    ThreadUserData myData;
    bstring testcase = bfromcstr("none");
    uint32_t numberOfWorkgroups = 0;
    int tmp = 0;
    double time;
    const TestCase* test = NULL;
    Workgroup* currentWorkgroup = NULL;
    Workgroup* groups = NULL;

    cpuid_init();
    numa_init();
    affinity_init();

    /* Handling of command line options */
    if (argc ==  1) { HELP_MSG; }

    while ((c = getopt (argc, argv, "g:w:t:i:l:aphv")) != -1) {
        switch (c)
        {
            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);    
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);    
            case 'a':
                printf(TESTS"\n");
                exit (EXIT_SUCCESS);    
            case 'w':
                tmp--;

                if (tmp == -1)
                {
                    fprintf (stderr, "More workgroups configured than allocated!\n");
                    return EXIT_FAILURE;
                }
                if (!test)
                {
                    fprintf (stderr, "You need to specify a test case first!\n");
                    return EXIT_FAILURE;
                }
                testcase = bfromcstr(optarg);
                currentWorkgroup = groups+tmp;  /*FIXME*/
                bstr_to_workgroup(currentWorkgroup, testcase, test->type, test->streams);
                bdestroy(testcase);

                for (i=0; i<  test->streams; i++)
                {
                    if (currentWorkgroup->streams[i].offset%test->stride)
                    {
                        fprintf (stderr, "Stream %d: offset is not a multiple of stride!\n",i);
                        return EXIT_FAILURE;
                    }

                    allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
                            PAGE_ALIGNMENT,
                            currentWorkgroup->size,
                            currentWorkgroup->streams[i].offset,
                            test->type,
                            currentWorkgroup->streams[i].domain);
                }

                break;
            case 'i':
                iter =  atoi(optarg);
                break;
            case 'l':
                testcase = bfromcstr(optarg);
                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (biseqcstr(testcase,"none"))
                {
                    fprintf (stderr, "Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                else
                {
                    printf("Name: %s\n",test->name);
                    printf("Number of streams: %d\n",test->streams);
                    printf("Loop stride: %d\n",test->stride);
                    printf("Flops: %d\n",test->flops);
                    printf("Bytes: %d\n",test->bytes);
                    switch (test->type)
                    {
                        case SINGLE:
                            printf("Data Type: Single precision float\n");
                            break;
                        case DOUBLE:
                            printf("Data Type: Double precision float\n");
                            break;
                    }
                }
                bdestroy(testcase);
                exit (EXIT_SUCCESS);    

                break;
            case 'p':
                optPrintDomains = 1;
                break;
            case 'g':
                numberOfWorkgroups =  atoi(optarg);
                allocator_init(numberOfWorkgroups * MAX_STREAMS);
                tmp = numberOfWorkgroups;
                groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
                break;
            case 't':
                testcase = bfromcstr(optarg);

                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (biseqcstr(testcase,"none"))
                {
                    fprintf (stderr, "Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                bdestroy(testcase);
                break;
            case '?':
                if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                return EXIT_FAILURE;
            default:
                HELP_MSG;
        }
    }


    if (optPrintDomains)
    {
        affinity_printDomains();
        exit (EXIT_SUCCESS);    
    }
    timer_init();

 /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread
  * module only allows equally sized thread groups*/
    for (i=0; i<numberOfWorkgroups; i++)
    {
        globalNumberOfThreads += groups[i].numberOfThreads;
    }

    threads_init(globalNumberOfThreads);
    threads_createGroups(numberOfWorkgroups);

    /* we configure global barriers only */
    barrier_init(1);
    barrier_registerGroup(globalNumberOfThreads);

#ifdef PERFMON
    printf("Using likwid\n");
    likwid_markerInit();
#endif


    /* initialize data structures for threads */
    for (i=0; i<numberOfWorkgroups; i++)
    {
        myData.iter = iter;
        myData.size = groups[i].size;
        myData.test = test;
        myData.numberOfThreads = groups[i].numberOfThreads;
        myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int));
        myData.streams = (void**) malloc(test->streams * sizeof(void*));

        for (j=0; j<groups[i].numberOfThreads; j++)
        {
            myData.processors[j] = groups[i].processorIds[j];
        }

        for (j=0; j<  test->streams; j++)
        {
            myData.streams[j] = groups[i].streams[j].ptr;
        }

        threads_registerDataGroup(i, &myData, copyThreadData);

        free(myData.processors);
        free(myData.streams);
    }

    printf(HLINE);
    printf("LIKWID MICRO BENCHMARK\n"); 
    printf("Test: %s\n",test->name); 
    printf(HLINE);
    printf("Using %d work groups\n",numberOfWorkgroups);
    printf("Using %d threads\n",globalNumberOfThreads);
    printf(HLINE);

    threads_create(runTest); 
    threads_destroy();
    allocator_finalize();

    time = (double) threads_data[0].cycles / (double) timer_getCpuClock();
    printf("Cycles: %llu \n", LLU_CAST threads_data[0].cycles);
    printf("Iterations: %llu \n", LLU_CAST iter);
    printf("Size: %d \n",  currentWorkgroup->size );
    printf("Vectorlength: %d \n", threads_data[0].data.size);
    printf("Time: %e sec\n", time);
    printf("MFlops/s:\t%.2f\n",
            1.0E-06 * ((double) numberOfWorkgroups * iter * currentWorkgroup->size *  test->flops/  time));
    printf("MByte/s:\t%.2f\n",
            1.0E-06 * ( (double) numberOfWorkgroups * iter * currentWorkgroup->size *  test->bytes/ time));
    printf("Cycles per update:\t%f\n",
            ((double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));

	switch ( test->type )
    {
        case SINGLE:
    printf("Cycles per cacheline:\t%f\n",
            (16.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));
            break;
        case DOUBLE:
    printf("Cycles per cacheline:\t%f\n",
            (8.0 * (double) threads_data[0].cycles / (double) (iter * threads_data[0].data.size)));
            break;
    }

    printf(HLINE);
#ifdef PERFMON
   likwid_markerClose();
#endif

    return EXIT_SUCCESS;
}

コード例 #11

ファイルを表示

int main (int argc, char** argv)
{
    int socket_fd = -1;
    int optInfo = 0;
    int optClock = 0;
    int optStethoscope = 0;
    int optSockets = 0;
    double runtime;
    int hasDRAM = 0;
    int c;
    bstring argString;
    bstring eventString = bfromcstr("CLOCK");
    int numSockets=1;
    int numThreads=0;
    int threadsSockets[MAX_NUM_NODES*2];
    int threads[MAX_NUM_THREADS];

    threadsSockets[0] = 0;
    
    if (argc == 1)
    {
    	HELP_MSG;
    	exit (EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1)
    {
        switch (c)
        {
            case 'c':
                CHECK_OPTION_STRING;
                numSockets = bstr_to_cpuset_physical((uint32_t*) threadsSockets, argString);
                bdestroy(argString);
                optSockets = 1;
                break;

            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'i':
                optInfo = 1;
                break;
            case 'M':  /* Set MSR Access mode */
                CHECK_OPTION_STRING;
                accessClient_setaccessmode(str2int((char*) argString->data));
                bdestroy(argString);
                break;
            case 'p':
                optClock = 1;
                break;
            case 's':
                CHECK_OPTION_STRING;
                optStethoscope = str2int((char*) argString->data);
                bdestroy(argString);
                break;
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case '?':
            	if (optopt == 's' || optopt == 'M' || optopt == 'c')
            	{
            		HELP_MSG;
            	}
                else if (isprint (optopt))
                {
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                }
                else
                {
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                }
                exit( EXIT_FAILURE);
            default:
                HELP_MSG;
                exit (EXIT_SUCCESS);
        }
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }
    
    if (optClock && optind == argc)
    {
    	fprintf(stderr,"Commandline option -p requires an executable.\n");
    	exit(EXIT_FAILURE);
    }
    if (optSockets && !optStethoscope && optind == argc)
    {
    	fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n");
    	exit(EXIT_FAILURE);
    }

    if (cpuid_init() == EXIT_FAILURE)
    {
        fprintf(stderr, "CPU not supported\n");
        exit(EXIT_FAILURE);
    }
    
    if (numSockets > cpuid_topology.numSockets)
    {
    	fprintf(stderr, "System has only %d sockets but %d are given on commandline\n",
    			cpuid_topology.numSockets, numSockets);
    	exit(EXIT_FAILURE);
    }

    numa_init(); /* consider NUMA node as power unit for the moment */
    accessClient_init(&socket_fd);
    msr_init(socket_fd);
    timer_init();

    /* check for supported processors */
    if ((cpuid_info.model == SANDYBRIDGE_EP) ||
            (cpuid_info.model == SANDYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE_EP) ||
            (cpuid_info.model == HASWELL) ||
            (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
            (cpuid_info.model == NEHALEM_LYNNFIELD) ||
            (cpuid_info.model == NEHALEM_WESTMERE))
    {
        power_init(numa_info.nodes[0].processors[0]);
    }
    else
    {
        fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n");
        exit(EXIT_FAILURE);
    }

    double clock = (double) timer_getCpuClock();

    printf(HLINE);
    printf("CPU name:\t%s \n",cpuid_info.name);
    printf("CPU clock:\t%3.2f GHz \n",  (float) clock * 1.E-09);
    printf(HLINE);

    if (optInfo)
    {
        if (power_info.turbo.numSteps != 0)
        {
            printf("Base clock:\t%.2f MHz \n",  power_info.baseFrequency );
            printf("Minimal clock:\t%.2f MHz \n",  power_info.minFrequency );
            printf("Turbo Boost Steps:\n");
            for (int i=0; i < power_info.turbo.numSteps; i++ )
            {
                printf("C%d %.2f MHz \n",i+1,  power_info.turbo.steps[i] );
            }
        }
        printf(HLINE);
    }

    if (cpuid_info.model == SANDYBRIDGE_EP)
    {
        hasDRAM = 1;
    }
    else if ((cpuid_info.model != SANDYBRIDGE) &&
            (cpuid_info.model != SANDYBRIDGE_EP)  &&
            (cpuid_info.model != IVYBRIDGE)  &&
            (cpuid_info.model != IVYBRIDGE_EP)  &&
            (cpuid_info.model != HASWELL))
    {
        fprintf (stderr, "RAPL not supported on this processor!\n");
        exit(EXIT_FAILURE);
    }

    if (optInfo)
    {
        printf("Thermal Spec Power: %g Watts \n", power_info.tdp );
        printf("Minimum  Power: %g Watts \n", power_info.minPower);
        printf("Maximum  Power: %g Watts \n", power_info.maxPower);
        printf("Maximum  Time Window: %g micro sec \n", power_info.maxTimeWindow);
        printf(HLINE);
        exit(EXIT_SUCCESS);
    }

    if (optClock)
    {
        affinity_init();
        argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1);
        for (int i=1; i<numSockets; i++)
        {
            bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1);
            bconcat(argString, tExpr);
        }
        numThreads = bstr_to_cpuset(threads, argString);
        bdestroy(argString);
        perfmon_init(numThreads, threads, stdout);
        perfmon_setupEventSet(eventString, NULL);
    }

    {
        PowerData pDataPkg[MAX_NUM_NODES*2];
        PowerData pDataDram[MAX_NUM_NODES*2];
        printf("Measure on sockets: %d", threadsSockets[0]);
        for (int i=1; i<numSockets; i++)
        {
            printf(", %d", threadsSockets[i]);
        }
        printf("\n");

        if (optStethoscope)
        {
            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }
            }
            sleep(optStethoscope);

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
            }
            runtime = (double) optStethoscope;
        }
        else
        {
            TimerData time;
            argv +=  optind;
            bstring exeString = bfromcstr(argv[0]);

            for (int i=1; i<(argc-optind); i++)
            {
                bconchar(exeString, ' ');
                bcatcstr(exeString, argv[i]);
            }
            printf("%s\n",bdata(exeString));


            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }

                timer_start(&time);
            }

            if (system(bdata(exeString)) == EOF)
            {
                fprintf(stderr, "Failed to execute %s!\n", bdata(exeString));
                exit(EXIT_FAILURE);
            }

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                timer_stop(&time);

                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
                runtime = timer_print(&time);
            }
        }

        if (!optClock)
        {
            printf("Runtime: %g second \n",runtime);
            printf(HLINE);
            for (int i=0; i<numSockets; i++)
            {
                printf("Socket %d\n",threadsSockets[i]);
                printf("Domain: PKG \n");
                printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i));
                printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime );
                if (hasDRAM)
                {
                    printf("Domain: DRAM \n");
                    printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i));
                    printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime );
                }
                printf("\n");
            }
        }
    }

#if 0
    if ( cpuid_hasFeature(TM2) )
    {
        thermal_init(0);
        printf("Current core temperatures:\n");

        for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ )
        {
            printf("Core %d: %u C\n",
                    numa_info.nodes[socketId].processors[i],
                    thermal_read(numa_info.nodes[socketId].processors[i]));
        }
    }
#endif

    msr_finalize();
    return EXIT_SUCCESS;
}

コード例 #12

ファイルを表示

ファイル: affinity.c プロジェクト: haibo031031/likwid

void
affinity_init()
{
    int numberOfDomains = 1; /* all systems have the node domain */
    int currentDomain;
    int subCounter = 0;
    int offset = 0;
    int tmp;
    if (affinity_initialized == 1)
    {
        return;
    }
    topology_init();
    int numberOfSocketDomains = cpuid_topology.numSockets;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: Socket domains %d, numberOfSocketDomains);
    numa_init();
    int numberOfNumaDomains = numa_info.numberOfNodes;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: NUMA domains %d, numberOfNumaDomains);
    int numberOfProcessorsPerSocket =
        cpuid_topology.numCoresPerSocket * cpuid_topology.numThreadsPerCore;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: CPUs per socket %d, numberOfProcessorsPerSocket);
    int numberOfCacheDomains;

    int numberOfCoresPerCache =
        cpuid_topology.cacheLevels[cpuid_topology.numCacheLevels-1].threads/
        cpuid_topology.numThreadsPerCore;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: CPU cores per LLC %d, numberOfCoresPerCache);

    int numberOfProcessorsPerCache =
        cpuid_topology.cacheLevels[cpuid_topology.numCacheLevels-1].threads;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: CPUs per LLC %d, numberOfProcessorsPerCache);
    /* for the cache domain take only into account last level cache and assume
     * all sockets to be uniform. */

    /* determine how many last level shared caches exist per socket */
    numberOfCacheDomains = cpuid_topology.numSockets *
        (cpuid_topology.numCoresPerSocket/numberOfCoresPerCache);
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: Cache domains %d, numberOfCacheDomains);
    /* determine total number of domains */
    numberOfDomains += numberOfSocketDomains + numberOfCacheDomains + numberOfNumaDomains;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity: All domains %d, numberOfDomains);
    domains = (AffinityDomain*) malloc(numberOfDomains * sizeof(AffinityDomain));
    if (!domains)
    {
        fprintf(stderr,"No more memory for %ld bytes for array of affinity domains\n",numberOfDomains * sizeof(AffinityDomain));
        return;
    }

    /* Node domain */
    domains[0].numberOfProcessors = cpuid_topology.activeHWThreads;
    domains[0].numberOfCores = cpuid_topology.numSockets * cpuid_topology.numCoresPerSocket;
    DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain N: %d HW threads on %d cores, domains[0].numberOfProcessors, domains[0].numberOfCores);
    domains[0].tag = bformat("N");
    domains[0].processorList = (int*) malloc(cpuid_topology.numHWThreads*sizeof(int));
    if (!domains[0].processorList)
    {
        fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
                cpuid_topology.numHWThreads*sizeof(int), 
                bdata(domains[0].tag));
        return;
    }
    offset = 0;

    if (numberOfSocketDomains > 1)
    {
        for (int i=0; i<numberOfSocketDomains; i++)
        {
          tmp = treeFillNextEntries(cpuid_topology.topologyTree,
                                    domains[0].processorList + offset,
                                    i, 0, numberOfProcessorsPerSocket);
          offset += tmp;
        }
    }
    else
    {
        tmp = treeFillNextEntries(cpuid_topology.topologyTree,
                                  domains[0].processorList,
                                  0, 0, domains[0].numberOfProcessors);
        domains[0].numberOfProcessors = tmp;
    }

    /* Socket domains */
    currentDomain = 1;
    for (int i=0; i < numberOfSocketDomains; i++ )
    {
        domains[currentDomain + i].numberOfProcessors = numberOfProcessorsPerSocket;
        domains[currentDomain + i].numberOfCores =  cpuid_topology.numCoresPerSocket;
        domains[currentDomain + i].tag = bformat("S%d", i);
        DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain S%d: %d HW threads on %d cores, i, domains[currentDomain + i].numberOfProcessors, domains[currentDomain + i].numberOfCores);
        domains[currentDomain + i].processorList = (int*) malloc( domains[currentDomain + i].numberOfProcessors * sizeof(int));
        if (!domains[currentDomain + i].processorList)   
        {
            fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
                    domains[currentDomain + i].numberOfProcessors * sizeof(int),
                    bdata(domains[currentDomain + i].tag));
            return;
        }

        tmp = treeFillNextEntries(cpuid_topology.topologyTree,
                                  domains[currentDomain + i].processorList,
                                  i, 0, domains[currentDomain + i].numberOfProcessors);
        for ( int j = 0; j < tmp; j++ )
        {
            affinity_core2node_lookup[domains[currentDomain + i].processorList[j]] = i;
        }
        domains[currentDomain + i].numberOfProcessors = tmp;
    }

    /* Cache domains */
    currentDomain += numberOfSocketDomains;
    subCounter = 0;
    for (int i=0; i < numberOfSocketDomains; i++ )
    {
        offset = 0;

        for ( int j=0; j < (numberOfCacheDomains/numberOfSocketDomains); j++ )
        {
            domains[currentDomain + subCounter].numberOfProcessors = numberOfProcessorsPerCache;
            domains[currentDomain + subCounter].numberOfCores =  numberOfCoresPerCache;
            domains[currentDomain + subCounter].tag = bformat("C%d", subCounter);
            DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain C%d: %d HW threads on %d cores, subCounter, domains[currentDomain + subCounter].numberOfProcessors, domains[currentDomain + subCounter].numberOfCores);
            domains[currentDomain + subCounter].processorList = (int*) malloc(numberOfProcessorsPerCache*sizeof(int));
            if (!domains[currentDomain + subCounter].processorList)   
            {
                fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
                        numberOfProcessorsPerCache*sizeof(int),
                        bdata(domains[currentDomain + subCounter].tag));
                return;
            }

            tmp = treeFillNextEntries(cpuid_topology.topologyTree,
                                      domains[currentDomain + subCounter].processorList,
                                      i, offset,
                                      domains[currentDomain + subCounter].numberOfProcessors);
            domains[currentDomain + subCounter].numberOfProcessors = tmp;
            offset += (tmp < numberOfCoresPerCache ? tmp : numberOfCoresPerCache);
            subCounter++;
        }
    }
    /* Memory domains */
    currentDomain += numberOfCacheDomains;
    subCounter = 0;
    if ((numberOfNumaDomains >= numberOfSocketDomains) && (numberOfNumaDomains > 1))
    {
        for (int i=0; i < numberOfSocketDomains; i++ )
        {
            offset = 0;
            for ( int j=0; j < (int)ceil((double)(numberOfNumaDomains)/numberOfSocketDomains); j++ )
            {
                domains[currentDomain + subCounter].numberOfProcessors =
                                numa_info.nodes[subCounter].numberOfProcessors;
                domains[currentDomain + subCounter].numberOfCores =
                                numa_info.nodes[subCounter].numberOfProcessors/cpuid_topology.numThreadsPerCore;
                domains[currentDomain + subCounter].tag = bformat("M%d", subCounter);
                DEBUG_PRINT(DEBUGLEV_DEVELOP, Affinity domain M%d: %d HW threads on %d cores, subCounter, domains[currentDomain + subCounter].numberOfProcessors, domains[currentDomain + subCounter].numberOfCores);
                domains[currentDomain + subCounter].processorList =
                                (int*) malloc(numa_info.nodes[subCounter].numberOfProcessors*sizeof(int));
                if (!domains[currentDomain + subCounter].processorList)
                {
                    fprintf(stderr,"No more memory for %ld bytes for processor list of affinity domain %s\n",
                            numa_info.nodes[subCounter].numberOfProcessors*sizeof(int),
                            bdata(domains[currentDomain + subCounter].tag));
                    return;
                }

                tmp = treeFillNextEntries(cpuid_topology.topologyTree,
                                          domains[currentDomain + subCounter].processorList,
                                          i, offset,
                                          domains[currentDomain + subCounter].numberOfProcessors);
                domains[currentDomain + subCounter].numberOfProcessors = tmp;
                offset += domains[currentDomain + subCounter].numberOfCores;
                subCounter++;
            }
        }
    }
    else
    {

コード例 #13

ファイルを表示

ファイル: init.c プロジェクト: Aresthu/ucore_plus

int kern_init(uint64_t mbmagic, uint64_t mbmem)
{
	extern char edata[], end[];
	memset(edata, 0, end - edata);

	/* percpu variable for CPU0 is preallocated */
	percpu_offsets[0] = __percpu_start;

	cons_init();		// init the console

	const char *message = "(THU.CST) os is loading ...";
	kprintf("%s\n\n", message);
	if(mbmagic == MULTIBOOT_BOOTLOADER_MAGIC){
		kprintf("Multiboot dectected: param %p\n", (void*)mbmem);
		mbmem2e820((Mbdata*)VADDR_DIRECT(mbmem));
		parse_initrd((Mbdata*)VADDR_DIRECT(mbmem));
	}

	print_kerninfo();

	/* get_cpu_var not available before tls_init() */
	hz_init();
	gdt_init(per_cpu_ptr(cpus, 0));
	tls_init(per_cpu_ptr(cpus, 0));
	acpitables_init();
	lapic_init();
	numa_init();

	pmm_init_numa();		// init physical memory management, numa awared
	/* map the lapic */
	lapic_init_late();

	//init the acpi stuff

	idt_init();		// init interrupt descriptor table
	pic_init();		// init interrupt controller

//	acpi_conf_init();


	percpu_init();
	cpus_init();
#ifdef UCONFIG_ENABLE_IPI
	ipi_init();
#endif

	refcache_init();

	vmm_init();		// init virtual memory management
	sched_init();		// init scheduler
	proc_init();		// init process table
	sync_init();		// init sync struct

	/* ext int */
	ioapic_init();
	acpi_init();

	ide_init();		// init ide devices
#ifdef UCONFIG_SWAP
	swap_init();		// init swap
#endif
	fs_init();		// init fs

	clock_init();		// init clock interrupt
	mod_init();

	trap_init();

	//XXX put here?
	bootaps();

	intr_enable();		// enable irq interrupt

#ifdef UCONFIG_HAVE_LINUX_DDE36_BASE
	dde_kit_init();
#endif

	/* do nothing */
	cpu_idle();		// run idle process
}

コード例 #14

ファイルを表示

ファイル: likwid-bench.c プロジェクト: app-genesis/likwid

int main(int argc, char** argv)
{
    uint64_t iter = 100;
    uint32_t i;
    uint32_t j;
    int globalNumberOfThreads = 0;
    int optPrintDomains = 0;
    int c;
    ThreadUserData myData;
    bstring testcase = bfromcstr("none");
    uint64_t numberOfWorkgroups = 0;
    int tmp = 0;
    double time;
    double cycPerUp = 0.0;
    const TestCase* test = NULL;
    uint64_t realSize = 0;
    uint64_t realIter = 0;
    uint64_t maxCycles = 0;
    uint64_t minCycles = UINT64_MAX;
    uint64_t cyclesClock = 0;
    uint64_t demandIter = 0;
    TimerData itertime;
    Workgroup* currentWorkgroup = NULL;
    Workgroup* groups = NULL;
    uint32_t min_runtime = 1; /* 1s */
    bstring HLINE = bfromcstr("");
    binsertch(HLINE, 0, 80, '-');
    binsertch(HLINE, 80, 1, '\n');
    int (*ownprintf)(const char *format, ...);
    ownprintf = &printf;

    /* Handling of command line options */
    if (argc ==  1)
    {
        HELP_MSG;
        exit(EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) {
        switch (c)
        {
            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case 'a':
                ownprintf(TESTS"\n");
                exit (EXIT_SUCCESS);
            case 'w':
                numberOfWorkgroups++;
                break;
            case 's':
                min_runtime = atoi(optarg);
                break;
            case 'i':
                demandIter = strtoul(optarg, NULL, 10);
                if (demandIter <= 0)
                {
                    fprintf (stderr, "Error: Iterations must be greater than 0\n");
                    return EXIT_FAILURE;
                }
                break;
            case 'l':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);
                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                else
                {
                    ownprintf("Name: %s\n",test->name);
                    ownprintf("Number of streams: %d\n",test->streams);
                    ownprintf("Loop stride: %d\n",test->stride);
                    ownprintf("Flops: %d\n",test->flops);
                    ownprintf("Bytes: %d\n",test->bytes);
                    switch (test->type)
                    {
                        case INT:
                            ownprintf("Data Type: Integer\n");
                            break;
                        case SINGLE:
                            ownprintf("Data Type: Single precision float\n");
                            break;
                        case DOUBLE:
                            ownprintf("Data Type: Double precision float\n");
                            break;
                    }
                    if (test->loads >= 0)
                    {
                        ownprintf("Load Ops: %d\n",test->loads);
                    }
                    if (test->stores >= 0)
                    {
                        ownprintf("Store Ops: %d\n",test->stores);
                    }
                    if (test->branches >= 0)
                    {
                        ownprintf("Branches: %d\n",test->branches);
                    }
                    if (test->instr_const >= 0)
                    {
                        ownprintf("Constant instructions: %d\n",test->instr_const);
                    }
                    if (test->instr_loop >= 0)
                    {
                        ownprintf("Loop instructions: %d\n",test->instr_loop);
                    }
                }
                bdestroy(testcase);
                exit (EXIT_SUCCESS);

                break;
            case 'p':
                optPrintDomains = 1;
                break;
            case 'g':
                numberOfWorkgroups = LLU_CAST atol(optarg);

                tmp = numberOfWorkgroups;

                break;
            case 't':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);

                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                bdestroy(testcase);
                break;
            case '?':
                if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                return EXIT_FAILURE;
            default:
                HELP_MSG;
        }
    }
    if ((numberOfWorkgroups == 0) && (!optPrintDomains))
    {
        fprintf(stderr, "Error: At least one workgroup (-w) must be set on commandline\n");
        exit (EXIT_FAILURE);
    }

    if (topology_init() != EXIT_SUCCESS)
    {
        fprintf(stderr, "Error: Unsupported processor!\n");
        exit(EXIT_FAILURE);
    }

    if ((test == NULL) && (!optPrintDomains))
    {
        fprintf(stderr, "Unknown test case. Please check likwid-bench -a for available tests\n");
        fprintf(stderr, "and select one using the -t commandline option\n");
        exit(EXIT_FAILURE);
    }

    numa_init();
    affinity_init();
    timer_init();

    if (optPrintDomains)
    {
        bdestroy(testcase);
        AffinityDomains_t affinity = get_affinityDomains();
        ownprintf("Number of Domains %d\n",affinity->numberOfAffinityDomains);
        for (i=0; i < affinity->numberOfAffinityDomains; i++ )
        {
            ownprintf("Domain %d:\n",i);
            ownprintf("\tTag %s:",bdata(affinity->domains[i].tag));

            for ( uint32_t j=0; j < affinity->domains[i].numberOfProcessors; j++ )
            {
                ownprintf(" %d",affinity->domains[i].processorList[j]);
            }
            ownprintf("\n");
        }
        exit (EXIT_SUCCESS);
    }

    allocator_init(numberOfWorkgroups * MAX_STREAMS);
    groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
    tmp = 0;

    optind = 0;
    while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1)
    {
        switch (c)
        {
            case 'w':
                currentWorkgroup = groups+tmp;
                bstring groupstr = bfromcstr(optarg);
                i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams);
                bdestroy(groupstr);
                if (i == 0)
                {
                    for (i=0; i<  test->streams; i++)
                    {
                        if (currentWorkgroup->streams[i].offset%test->stride)
                        {
                            fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i);
                            return EXIT_FAILURE;
                        }
                        allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
                                PAGE_ALIGNMENT,
                                currentWorkgroup->size,
                                currentWorkgroup->streams[i].offset,
                                test->type,
                                currentWorkgroup->streams[i].domain);
                    }
                    tmp++;
                }
                else
                {
                    exit(EXIT_FAILURE);
                }
                break;
            default:
                continue;
                break;
        }
    }

    /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread
     * module only allows equally sized thread groups*/
    for (i=0; i<numberOfWorkgroups; i++)
    {
        globalNumberOfThreads += groups[i].numberOfThreads;
    }

    ownprintf(bdata(HLINE));
    ownprintf("LIKWID MICRO BENCHMARK\n");
    ownprintf("Test: %s\n",test->name);
    ownprintf(bdata(HLINE));
    ownprintf("Using %" PRIu64 " work groups\n",numberOfWorkgroups);
    ownprintf("Using %d threads\n",globalNumberOfThreads);
    ownprintf(bdata(HLINE));


    threads_init(globalNumberOfThreads);
    threads_createGroups(numberOfWorkgroups);

    /* we configure global barriers only */
    barrier_init(1);
    barrier_registerGroup(globalNumberOfThreads);
    cyclesClock = timer_getCycleClock();

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Using Likwid Marker API\n");
    }
    LIKWID_MARKER_INIT;
    ownprintf(bdata(HLINE));
#endif


    /* initialize data structures for threads */
    for (i=0; i<numberOfWorkgroups; i++)
    {
        myData.iter = iter;
        if (demandIter > 0)
        {
            myData.iter = demandIter;
        }
        myData.min_runtime = min_runtime;
        myData.size = groups[i].size;
        myData.test = test;
        myData.cycles = 0;
        myData.numberOfThreads = groups[i].numberOfThreads;
        myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int));
        myData.streams = (void**) malloc(test->streams * sizeof(void*));

        for (j=0; j<groups[i].numberOfThreads; j++)
        {
            myData.processors[j] = groups[i].processorIds[j];
        }

        for (j=0; j<  test->streams; j++)
        {
            myData.streams[j] = groups[i].streams[j].ptr;
        }

        threads_registerDataGroup(i, &myData, copyThreadData);

        free(myData.processors);
        free(myData.streams);
    }

    if (demandIter == 0)
    {
        getIterSingle((void*) &threads_data[0]);
        for (i=0; i<numberOfWorkgroups; i++)
        {
            iter = threads_updateIterations(i, demandIter);
        }
    }
#ifdef DEBUG_LIKWID
    else
    {
        ownprintf("Using manually selected iterations per thread\n");
    }
#endif

    threads_create(runTest);
    threads_join();

    for (int i=0; i<globalNumberOfThreads; i++)
    {
        realSize += threads_data[i].data.size;
        realIter += threads_data[i].data.iter;
        if (threads_data[i].cycles > maxCycles)
        {
            maxCycles = threads_data[i].cycles;
        }
        if (threads_data[i].cycles < minCycles)
        {
            minCycles = threads_data[i].cycles;
        }
    }



    time = (double) maxCycles / (double) cyclesClock;
    ownprintf(bdata(HLINE));
    ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles);
    ownprintf("CPU Clock:\t\t%" PRIu64 "\n", timer_getCpuClock());
    ownprintf("Cycle Clock:\t\t%" PRIu64 "\n", cyclesClock);
    ownprintf("Time:\t\t\t%e sec\n", time);
    ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter);
    ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter);
    ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride));
    ownprintf("Size:\t\t\t%" PRIu64 "\n",  realSize*test->bytes );
    ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes);
    ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize *  test->flops));
    ownprintf("MFlops/s:\t\t%.2f\n",
            1.0E-06 * ((double) threads_data[0].data.iter * realSize *  test->flops/  time));
    
    ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize *  test->bytes));
    ownprintf("MByte/s:\t\t%.2f\n",
            1.0E-06 * ( (double) threads_data[0].data.iter * realSize *  test->bytes/ time));

    cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize));
    ownprintf("Cycles per update:\t%f\n", cycPerUp);

    switch ( test->type )
    {
        case INT:
        case SINGLE:
            ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp));
            break;
        case DOUBLE:
            ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp));
            break;
    }
    ownprintf("Loads per update:\t%ld\n", test->loads );
    ownprintf("Stores per update:\t%ld\n", test->stores );
    if ((test->loads > 0) && (test->stores > 0))
    {
        ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) );
    }
    if ((test->instr_loop > 0) && (test->instr_const > 0))
    {
        ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const );
    }
    if (test->uops > 0)
    {
        ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter);
    }

    ownprintf(bdata(HLINE));
    threads_destroy(numberOfWorkgroups, test->streams);
    allocator_finalize();
    workgroups_destroy(&groups, numberOfWorkgroups, test->streams);

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Writing Likwid Marker API results to file %s\n", getenv("LIKWID_FILEPATH"));
    }
    LIKWID_MARKER_CLOSE;
#endif

    bdestroy(HLINE);
    return EXIT_SUCCESS;
}