Exemplo n.º 1
0
static int lua_likwid_pinProcess(lua_State* L)
{
    int cpuID = luaL_checknumber(L,-2);
    int silent = luaL_checknumber(L,-1);
    luaL_argcheck(L, cpuID >= 0, 1, "CPU ID must be greater or equal 0");
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    affinity_pinProcess(cpuID);
    if (!silent)
    {
#ifdef COLOR
            color_on(BRIGHT, COLOR);
#endif
            printf("[likwid-pin] Main PID -> core %d - OK",  cpuID);
#ifdef COLOR
            color_reset();
#endif
            printf("\n");
    }
    return 0;
}
Exemplo n.º 2
0
static int cpustr_to_cpulist_scatter(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    char* cpustring = bstr2cstr(bcpustr, '\0');
    if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR)
    {
        int insert = 0;
        int suitidx = 0;
        int* suitable = (int*)malloc(affinity->numberOfAffinityDomains*sizeof(int));
        if (!suitable)
        {
            bcstrfree(cpustring);
            return -ENOMEM;
        }
        for (int i=0; i<affinity->numberOfAffinityDomains; i++)
        {
            if (bstrchrp(affinity->domains[i].tag, cpustring[0], 0) != BSTR_ERR)
            {
                suitable[suitidx] = i;
                suitidx++;
            }
        }
        int* sortedList = (int*) malloc(affinity->domains[suitable[0]].numberOfProcessors * sizeof(int));
        if (!sortedList)
        {
            free(suitable);
            bcstrfree(cpustring);
            return -ENOMEM;
        }
        for (int off=0;off<affinity->domains[suitable[0]].numberOfProcessors;off++)
        {
            for(int i=0;i < suitidx; i++)
            {
                cpulist_sort(affinity->domains[suitable[i]].processorList, sortedList, affinity->domains[suitable[i]].numberOfProcessors);
                cpulist[insert] = sortedList[off];
                insert++;
                if (insert == length)
                    goto scatter_done;
            }
        }
scatter_done:
        bcstrfree(cpustring);
        free(sortedList);
        free(suitable);
        return insert;
    }
    bcstrfree(cpustring);
    return 0;
}
Exemplo n.º 3
0
static int cpu_in_domain(int domainidx, int cpu)
{
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    for (int i=0;i<affinity->domains[domainidx].numberOfProcessors; i++)
    {
        if (cpu == affinity->domains[domainidx].processorList[i])
        {
            return 1;
        }
    }
    return 0;
}
Exemplo n.º 4
0
int test_affinityinit()
{
    int i = 0;
    topology_init();
    CpuTopology_t cputopo = get_cpuTopology();
    numa_init();
    affinity_init();
    AffinityDomains_t doms = get_affinityDomains();
    if (doms == NULL)
        goto fail;
    if (doms->numberOfSocketDomains != cputopo->numSockets)
        goto fail;
    if (doms->numberOfNumaDomains == 0)
        goto fail;
    if (doms->numberOfProcessorsPerSocket == 0)
        goto fail;
    if (doms->numberOfAffinityDomains == 0)
        goto fail;
    if (doms->numberOfCacheDomains == 0)
        goto fail;
    if (doms->numberOfCoresPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache == 0)
        goto fail;
    if (doms->numberOfProcessorsPerCache < doms->numberOfCoresPerCache)
        goto fail;
    if (doms->domains == NULL)
        goto fail;
    for (i = 0; i < doms->numberOfAffinityDomains; i++)
    {
        if (doms->domains[i].numberOfProcessors == 0)
            goto fail;
        if (doms->domains[i].numberOfCores == 0)
            goto fail;
        if (doms->domains[i].numberOfProcessors < doms->domains[i].numberOfCores)
            goto fail;
        if (doms->domains[i].processorList == NULL)
            goto fail;
    }
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    affinity_finalize();
    topology_finalize();
    return 0;
}
Exemplo n.º 5
0
static int cpuexpr_to_list(bstring bcpustr, bstring prefix, int* list, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ',');
    int oldinsert = 0;
    int insert = 0;
    for (int i=0;i < strlist->qty; i++)
    {
        bstring newstr = bstrcpy(prefix);
        bconcat(newstr, strlist->entry[i]);
        oldinsert = insert;
        for (int j = 0; j < affinity->numberOfAffinityDomains; j++)
        {
            if (bstrcmp(affinity->domains[j].tag, newstr) == 0)
            {
                list[insert] = atoi(bdata(strlist->entry[i]));
                insert++;
                if (insert == length)
                    goto list_done;
                break;
            }
        }
        if (insert == oldinsert)
        {
            fprintf(stderr,"Domain %s cannot be found\n", bdata(newstr));
        }
        bdestroy(newstr);
    }
list_done:
    bstrListDestroy(strlist);
    return insert;
}
Exemplo n.º 6
0
static int lua_likwid_getAffinityInfo(lua_State* L)
{
    int i,j;
    
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }

    if (!affinity)
    {
        lua_pushstring(L,"Cannot initialize affinity groups");
        lua_error(L);
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfAffinityDomains");
    lua_pushunsigned(L,affinity->numberOfAffinityDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfSocketDomains");
    lua_pushunsigned(L,affinity->numberOfSocketDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfNumaDomains");
    lua_pushunsigned(L,affinity->numberOfNumaDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerSocket");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerSocket);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCacheDomains");
    lua_pushunsigned(L,affinity->numberOfCacheDomains);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfCoresPerCache");
    lua_pushunsigned(L,affinity->numberOfCoresPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"numberOfProcessorsPerCache");
    lua_pushunsigned(L,affinity->numberOfProcessorsPerCache);
    lua_settable(L,-3);
    lua_pushstring(L,"domains");
    lua_newtable(L);
    for(i=0;i<affinity->numberOfAffinityDomains;i++)
    {
        lua_pushunsigned(L, i+1);
        lua_newtable(L);
        lua_pushstring(L,"tag");
        lua_pushstring(L,bdata(affinity->domains[i].tag));
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,affinity->domains[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfCores");
        lua_pushunsigned(L,affinity->domains[i].numberOfCores);
        lua_settable(L,-3);
        lua_pushstring(L,"processorList");
        lua_newtable(L);
        for(j=0;j<affinity->domains[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,affinity->domains[i].processorList[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}
Exemplo n.º 7
0
static int lua_likwid_getNumaInfo(lua_State* L)
{
    uint32_t i,j;
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        if (numa_init() == 0)
        {
            numa_isInitialized = 1;
            numainfo = get_numaTopology();
        }
        else
        {
            lua_newtable(L);
            lua_pushstring(L,"numberOfNodes");
            lua_pushunsigned(L,0);
            lua_settable(L,-3);
            lua_pushstring(L,"nodes");
            lua_newtable(L);
            lua_settable(L,-3);
            return 1;
        }
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (affinity_isInitialized == 0)
    {
        affinity_init();
        affinity_isInitialized = 1;
        affinity = get_affinityDomains();
    }
    if ((affinity_isInitialized) && (affinity == NULL))
    {
        affinity = get_affinityDomains();
    }
    lua_newtable(L);
    lua_pushstring(L,"numberOfNodes");
    lua_pushunsigned(L,numainfo->numberOfNodes);
    lua_settable(L,-3);

    lua_pushstring(L,"nodes");
    lua_newtable(L);
    for(i=0;i<numainfo->numberOfNodes;i++)
    {
        lua_pushinteger(L, i+1);
        lua_newtable(L);
        
        lua_pushstring(L,"id");
        lua_pushunsigned(L,numainfo->nodes[i].id);
        lua_settable(L,-3);
        lua_pushstring(L,"totalMemory");
        lua_pushunsigned(L,numainfo->nodes[i].totalMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"freeMemory");
        lua_pushunsigned(L,numainfo->nodes[i].freeMemory);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfProcessors");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfProcessors);
        lua_settable(L,-3);
        lua_pushstring(L,"numberOfDistances");
        lua_pushunsigned(L,numainfo->nodes[i].numberOfDistances);
        lua_settable(L,-3);
        
        lua_pushstring(L,"processors");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j+1);
            lua_pushunsigned(L,numainfo->nodes[i].processors[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        /*lua_pushstring(L,"processorsCompact");
        lua_newtable(L);
        for(j=0;j<numa->nodes[i].numberOfProcessors;j++)
        {
            lua_pushunsigned(L,j);
            lua_pushunsigned(L,numa->nodes[i].processorsCompact[j]);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);*/
        
        lua_pushstring(L,"distances");
        lua_newtable(L);
        for(j=0;j<numainfo->nodes[i].numberOfDistances;j++)
        {
            lua_pushinteger(L,j+1);
            lua_newtable(L);
            lua_pushinteger(L,j);
            lua_pushunsigned(L,numainfo->nodes[i].distances[j]);
            lua_settable(L,-3);
            lua_settable(L,-3);
        }
        lua_settable(L,-3);
        
        lua_settable(L,-3);
    }
    lua_settable(L,-3);
    return 1;
}
Exemplo n.º 8
0
void
allocator_init(int numVectors)
{
    allocList = (allocation*) malloc(numVectors * sizeof(allocation));
    domains = get_affinityDomains();
}
Exemplo n.º 9
0
static int cpustr_to_cpulist_physical(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    bstring bdomain;
    bstring blist;
    int domainidx = -1;
    if (bstrchrp(bcpustr, ':', 0) != BSTR_ERR)
    {
        struct bstrList* strlist = bstrListCreate();
        strlist = bsplit(bcpustr, ':');
        bdomain = bstrcpy(strlist->entry[0]);
        blist = bstrcpy(strlist->entry[1]);
        bstrListDestroy(strlist);
    }
    else
    {
        bdomain = bformat("N");
        blist = bstrcpy(bcpustr);
    }
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        bdestroy(bdomain);
        bdestroy(blist);
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(blist, ',');
    int insert = 0;
    for (int i=0;i< strlist->qty; i++)
    {
        if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
        {
            struct bstrList* indexlist = bstrListCreate();
            indexlist = bsplit(strlist->entry[i], '-');
            if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++)
                {
                    if (cpu_in_domain(domainidx, j))
                    {
                        cpulist[insert] = j;
                        insert++;
                        if (insert == length)
                        {
                            bstrListDestroy(indexlist);
                            goto physical_done;
                        }
                    }
                    else
                    {
                        fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag));
                    }
                }
            }
            else
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--)
                {
                    if (cpu_in_domain(domainidx, j))
                    {
                        cpulist[insert] = j;
                        insert++;
                        if (insert == length)
                        {
                            bstrListDestroy(indexlist);
                            goto physical_done;
                        }
                    }
                    else
                    {
                        fprintf(stderr, "CPU %d not in domain %s\n", j, bdata(affinity->domains[domainidx].tag));
                    }
                }
            }
            bstrListDestroy(indexlist);
        }
        else
        {
            int cpu = atoi(bdata(strlist->entry[i]));
            if (cpu_in_domain(domainidx, cpu))
            {
                cpulist[insert] = cpu;
                insert++;
                if (insert == length)
                {
                    goto physical_done;
                }
            }
            else
            {
                fprintf(stderr, "CPU %d not in domain %s\n", cpu, bdata(affinity->domains[domainidx].tag));
            }
        }
    }
physical_done:
    bstrListDestroy(strlist);
    bdestroy(bdomain);
    bdestroy(blist);
    return insert;
}
Exemplo n.º 10
0
static int cpustr_to_cpulist_logical(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    int domainidx = -1;
    bstring bdomain;
    bstring blist;
    if (bstrchrp(bcpustr, 'L', 0) != 0)
    {
        fprintf(stderr, "Not a valid CPU expression\n");
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ':');
    if (strlist->qty != 3)
    {
        fprintf(stderr, "ERROR: Invalid expression, should look like L:<domain>:<indexlist> or be in a cpuset\n");
        bstrListDestroy(strlist);
        return 0;
    }
    bdomain = bstrcpy(strlist->entry[1]);
    blist = bstrcpy(strlist->entry[2]);
    bstrListDestroy(strlist);
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        return 0;
    }
    int *inlist = malloc(affinity->domains[domainidx].numberOfProcessors * sizeof(int));
    if (inlist == NULL)
    {
        return -ENOMEM;
    }
    int ret = cpulist_sort(affinity->domains[domainidx].processorList, inlist, affinity->domains[domainidx].numberOfProcessors);

    strlist = bstrListCreate();
    strlist = bsplit(blist, ',');
    int insert = 0;
    for (int i=0; i< strlist->qty; i++)
    {
        if (bstrchrp(strlist->entry[i], '-', 0) != BSTR_ERR)
        {
            struct bstrList* indexlist = bstrListCreate();
            indexlist = bsplit(strlist->entry[i], '-');
            if (atoi(bdata(indexlist->entry[0])) <= atoi(bdata(indexlist->entry[1])))
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1]));j++)
                {
                    cpulist[insert] = inlist[j];
                    insert++;
                    if (insert == length)
                    {
                        bstrListDestroy(indexlist);
                        goto logical_done;
                    }
                }
            }
            else
            {
                for (int j=atoi(bdata(indexlist->entry[0])); j>=atoi(bdata(indexlist->entry[1]));j--)
                {
                    cpulist[insert] = inlist[j];
                    insert++;
                    if (insert == length)
                    {
                        bstrListDestroy(indexlist);
                        goto logical_done;
                    }
                }
            }
            bstrListDestroy(indexlist);
        }
        else
        {
            cpulist[insert] = inlist[atoi(bdata(strlist->entry[i]))];
            insert++;
            if (insert == length)
            {
                goto logical_done;
            }
        }
    }
logical_done:
    free(inlist);
    bstrListDestroy(strlist);
    return insert;
}
Exemplo n.º 11
0
static int cpustr_to_cpulist_expression(bstring bcpustr, int* cpulist, int length)
{
    topology_init();
    CpuTopology_t cpuid_topology = get_cpuTopology();
    affinity_init();
    AffinityDomains_t affinity = get_affinityDomains();
    bstring bdomain;
    int domainidx = -1;
    int count = 0;
    int stride = 0;
    int chunk = 0;
    if (bstrchrp(bcpustr, 'E', 0) != 0)
    {
        fprintf(stderr, "Not a valid CPU expression\n");
        return 0;
    }
    struct bstrList* strlist = bstrListCreate();
    strlist = bsplit(bcpustr, ':');
    if (strlist->qty == 3)
    {
        bdomain = bstrcpy(strlist->entry[1]);
        count = atoi(bdata(strlist->entry[2]));
        stride = 1;
        chunk = 1;
    }
    else if (strlist->qty == 5)
    {
        bdomain = bstrcpy(strlist->entry[1]);
        count = atoi(bdata(strlist->entry[2]));
        chunk = atoi(bdata(strlist->entry[3]));
        stride = atoi(bdata(strlist->entry[4]));
    }
    for (int i=0; i<affinity->numberOfAffinityDomains; i++)
    {
        if (bstrcmp(bdomain, affinity->domains[i].tag) == 0)
        {
            domainidx = i;
            break;
        }
    }
    if (domainidx < 0)
    {
        fprintf(stderr, "Cannot find domain %s\n", bdata(bdomain));
        bstrListDestroy(strlist);
        return 0;
    }
    int offset = 0;
    int insert = 0;
    for (int i=0;i<count;i++)
    {
        for (int j=0;j<chunk && offset+j<affinity->domains[domainidx].numberOfProcessors;j++)
        {
            cpulist[insert] = affinity->domains[domainidx].processorList[offset + j];
            insert++;
            if (insert == length)
                goto expression_done;
        }
        offset += stride;
        if (offset >= affinity->domains[domainidx].numberOfProcessors)
        {
            offset = 0;
        }
        if (insert >= count)
            goto expression_done;
    }
    bstrListDestroy(strlist);
    return 0;
expression_done:
    bstrListDestroy(strlist);
    return insert;
}
Exemplo n.º 12
0
int main(int argc, char** argv)
{
    uint64_t iter = 100;
    uint32_t i;
    uint32_t j;
    int globalNumberOfThreads = 0;
    int optPrintDomains = 0;
    int c;
    ThreadUserData myData;
    bstring testcase = bfromcstr("none");
    uint64_t numberOfWorkgroups = 0;
    int tmp = 0;
    double time;
    double cycPerUp = 0.0;
    const TestCase* test = NULL;
    uint64_t realSize = 0;
    uint64_t realIter = 0;
    uint64_t maxCycles = 0;
    uint64_t minCycles = UINT64_MAX;
    uint64_t cyclesClock = 0;
    uint64_t demandIter = 0;
    TimerData itertime;
    Workgroup* currentWorkgroup = NULL;
    Workgroup* groups = NULL;
    uint32_t min_runtime = 1; /* 1s */
    bstring HLINE = bfromcstr("");
    binsertch(HLINE, 0, 80, '-');
    binsertch(HLINE, 80, 1, '\n');
    int (*ownprintf)(const char *format, ...);
    ownprintf = &printf;

    /* Handling of command line options */
    if (argc ==  1)
    {
        HELP_MSG;
        exit(EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "w:t:s:l:aphvi:")) != -1) {
        switch (c)
        {
            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case 'a':
                ownprintf(TESTS"\n");
                exit (EXIT_SUCCESS);
            case 'w':
                numberOfWorkgroups++;
                break;
            case 's':
                min_runtime = atoi(optarg);
                break;
            case 'i':
                demandIter = strtoul(optarg, NULL, 10);
                if (demandIter <= 0)
                {
                    fprintf (stderr, "Error: Iterations must be greater than 0\n");
                    return EXIT_FAILURE;
                }
                break;
            case 'l':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);
                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                else
                {
                    ownprintf("Name: %s\n",test->name);
                    ownprintf("Number of streams: %d\n",test->streams);
                    ownprintf("Loop stride: %d\n",test->stride);
                    ownprintf("Flops: %d\n",test->flops);
                    ownprintf("Bytes: %d\n",test->bytes);
                    switch (test->type)
                    {
                        case INT:
                            ownprintf("Data Type: Integer\n");
                            break;
                        case SINGLE:
                            ownprintf("Data Type: Single precision float\n");
                            break;
                        case DOUBLE:
                            ownprintf("Data Type: Double precision float\n");
                            break;
                    }
                    if (test->loads >= 0)
                    {
                        ownprintf("Load Ops: %d\n",test->loads);
                    }
                    if (test->stores >= 0)
                    {
                        ownprintf("Store Ops: %d\n",test->stores);
                    }
                    if (test->branches >= 0)
                    {
                        ownprintf("Branches: %d\n",test->branches);
                    }
                    if (test->instr_const >= 0)
                    {
                        ownprintf("Constant instructions: %d\n",test->instr_const);
                    }
                    if (test->instr_loop >= 0)
                    {
                        ownprintf("Loop instructions: %d\n",test->instr_loop);
                    }
                }
                bdestroy(testcase);
                exit (EXIT_SUCCESS);

                break;
            case 'p':
                optPrintDomains = 1;
                break;
            case 'g':
                numberOfWorkgroups = LLU_CAST atol(optarg);

                tmp = numberOfWorkgroups;

                break;
            case 't':
                bdestroy(testcase);
                testcase = bfromcstr(optarg);

                for (i=0; i<NUMKERNELS; i++)
                {
                    if (biseqcstr(testcase, kernels[i].name))
                    {
                        test = kernels+i;
                        break;
                    }
                }

                if (test == NULL)
                {
                    fprintf (stderr, "Error: Unknown test case %s\n",optarg);
                    return EXIT_FAILURE;
                }
                bdestroy(testcase);
                break;
            case '?':
                if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                return EXIT_FAILURE;
            default:
                HELP_MSG;
        }
    }
    if ((numberOfWorkgroups == 0) && (!optPrintDomains))
    {
        fprintf(stderr, "Error: At least one workgroup (-w) must be set on commandline\n");
        exit (EXIT_FAILURE);
    }

    if (topology_init() != EXIT_SUCCESS)
    {
        fprintf(stderr, "Error: Unsupported processor!\n");
        exit(EXIT_FAILURE);
    }

    if ((test == NULL) && (!optPrintDomains))
    {
        fprintf(stderr, "Unknown test case. Please check likwid-bench -a for available tests\n");
        fprintf(stderr, "and select one using the -t commandline option\n");
        exit(EXIT_FAILURE);
    }

    numa_init();
    affinity_init();
    timer_init();

    if (optPrintDomains)
    {
        bdestroy(testcase);
        AffinityDomains_t affinity = get_affinityDomains();
        ownprintf("Number of Domains %d\n",affinity->numberOfAffinityDomains);
        for (i=0; i < affinity->numberOfAffinityDomains; i++ )
        {
            ownprintf("Domain %d:\n",i);
            ownprintf("\tTag %s:",bdata(affinity->domains[i].tag));

            for ( uint32_t j=0; j < affinity->domains[i].numberOfProcessors; j++ )
            {
                ownprintf(" %d",affinity->domains[i].processorList[j]);
            }
            ownprintf("\n");
        }
        exit (EXIT_SUCCESS);
    }

    allocator_init(numberOfWorkgroups * MAX_STREAMS);
    groups = (Workgroup*) malloc(numberOfWorkgroups*sizeof(Workgroup));
    tmp = 0;

    optind = 0;
    while ((c = getopt (argc, argv, "w:t:s:l:i:aphv")) != -1)
    {
        switch (c)
        {
            case 'w':
                currentWorkgroup = groups+tmp;
                bstring groupstr = bfromcstr(optarg);
                i = bstr_to_workgroup(currentWorkgroup, groupstr, test->type, test->streams);
                bdestroy(groupstr);
                if (i == 0)
                {
                    for (i=0; i<  test->streams; i++)
                    {
                        if (currentWorkgroup->streams[i].offset%test->stride)
                        {
                            fprintf (stderr, "Error: Stream %d: offset is not a multiple of stride!\n",i);
                            return EXIT_FAILURE;
                        }
                        allocator_allocateVector(&(currentWorkgroup->streams[i].ptr),
                                PAGE_ALIGNMENT,
                                currentWorkgroup->size,
                                currentWorkgroup->streams[i].offset,
                                test->type,
                                currentWorkgroup->streams[i].domain);
                    }
                    tmp++;
                }
                else
                {
                    exit(EXIT_FAILURE);
                }
                break;
            default:
                continue;
                break;
        }
    }

    /* :WARNING:05/04/2010 08:58:05 AM:jt: At the moment the thread
     * module only allows equally sized thread groups*/
    for (i=0; i<numberOfWorkgroups; i++)
    {
        globalNumberOfThreads += groups[i].numberOfThreads;
    }

    ownprintf(bdata(HLINE));
    ownprintf("LIKWID MICRO BENCHMARK\n");
    ownprintf("Test: %s\n",test->name);
    ownprintf(bdata(HLINE));
    ownprintf("Using %" PRIu64 " work groups\n",numberOfWorkgroups);
    ownprintf("Using %d threads\n",globalNumberOfThreads);
    ownprintf(bdata(HLINE));


    threads_init(globalNumberOfThreads);
    threads_createGroups(numberOfWorkgroups);

    /* we configure global barriers only */
    barrier_init(1);
    barrier_registerGroup(globalNumberOfThreads);
    cyclesClock = timer_getCycleClock();

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Using Likwid Marker API\n");
    }
    LIKWID_MARKER_INIT;
    ownprintf(bdata(HLINE));
#endif


    /* initialize data structures for threads */
    for (i=0; i<numberOfWorkgroups; i++)
    {
        myData.iter = iter;
        if (demandIter > 0)
        {
            myData.iter = demandIter;
        }
        myData.min_runtime = min_runtime;
        myData.size = groups[i].size;
        myData.test = test;
        myData.cycles = 0;
        myData.numberOfThreads = groups[i].numberOfThreads;
        myData.processors = (int*) malloc(myData.numberOfThreads * sizeof(int));
        myData.streams = (void**) malloc(test->streams * sizeof(void*));

        for (j=0; j<groups[i].numberOfThreads; j++)
        {
            myData.processors[j] = groups[i].processorIds[j];
        }

        for (j=0; j<  test->streams; j++)
        {
            myData.streams[j] = groups[i].streams[j].ptr;
        }

        threads_registerDataGroup(i, &myData, copyThreadData);

        free(myData.processors);
        free(myData.streams);
    }

    if (demandIter == 0)
    {
        getIterSingle((void*) &threads_data[0]);
        for (i=0; i<numberOfWorkgroups; i++)
        {
            iter = threads_updateIterations(i, demandIter);
        }
    }
#ifdef DEBUG_LIKWID
    else
    {
        ownprintf("Using manually selected iterations per thread\n");
    }
#endif

    threads_create(runTest);
    threads_join();

    for (int i=0; i<globalNumberOfThreads; i++)
    {
        realSize += threads_data[i].data.size;
        realIter += threads_data[i].data.iter;
        if (threads_data[i].cycles > maxCycles)
        {
            maxCycles = threads_data[i].cycles;
        }
        if (threads_data[i].cycles < minCycles)
        {
            minCycles = threads_data[i].cycles;
        }
    }



    time = (double) maxCycles / (double) cyclesClock;
    ownprintf(bdata(HLINE));
    ownprintf("Cycles:\t\t\t%" PRIu64 "\n", maxCycles);
    ownprintf("CPU Clock:\t\t%" PRIu64 "\n", timer_getCpuClock());
    ownprintf("Cycle Clock:\t\t%" PRIu64 "\n", cyclesClock);
    ownprintf("Time:\t\t\t%e sec\n", time);
    ownprintf("Iterations:\t\t%" PRIu64 "\n", realIter);
    ownprintf("Iterations per thread:\t%" PRIu64 "\n",threads_data[0].data.iter);
    ownprintf("Inner loop executions:\t%.0f\n", ((double)realSize)/((double)test->stride));
    ownprintf("Size:\t\t\t%" PRIu64 "\n",  realSize*test->bytes );
    ownprintf("Size per thread:\t%" PRIu64 "\n", threads_data[0].data.size*test->bytes);
    ownprintf("Number of Flops:\t%" PRIu64 "\n", (threads_data[0].data.iter * realSize *  test->flops));
    ownprintf("MFlops/s:\t\t%.2f\n",
            1.0E-06 * ((double) threads_data[0].data.iter * realSize *  test->flops/  time));
    
    ownprintf("Data volume (Byte):\t%llu\n", LLU_CAST (threads_data[0].data.iter * realSize *  test->bytes));
    ownprintf("MByte/s:\t\t%.2f\n",
            1.0E-06 * ( (double) threads_data[0].data.iter * realSize *  test->bytes/ time));

    cycPerUp = ((double) maxCycles / (double) (threads_data[0].data.iter * realSize));
    ownprintf("Cycles per update:\t%f\n", cycPerUp);

    switch ( test->type )
    {
        case INT:
        case SINGLE:
            ownprintf("Cycles per cacheline:\t%f\n", (16.0 * cycPerUp));
            break;
        case DOUBLE:
            ownprintf("Cycles per cacheline:\t%f\n", (8.0 * cycPerUp));
            break;
    }
    ownprintf("Loads per update:\t%ld\n", test->loads );
    ownprintf("Stores per update:\t%ld\n", test->stores );
    if ((test->loads > 0) && (test->stores > 0))
    {
        ownprintf("Load/store ratio:\t%.2f\n", ((double)test->loads)/((double)test->stores) );
    }
    if ((test->instr_loop > 0) && (test->instr_const > 0))
    {
        ownprintf("Instructions:\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->instr_loop*threads_data[0].data.iter + test->instr_const );
    }
    if (test->uops > 0)
    {
        ownprintf("UOPs:\t\t\t%" PRIu64 "\n", LLU_CAST ((double)realSize/test->stride)*test->uops*threads_data[0].data.iter);
    }

    ownprintf(bdata(HLINE));
    threads_destroy(numberOfWorkgroups, test->streams);
    allocator_finalize();
    workgroups_destroy(&groups, numberOfWorkgroups, test->streams);

#ifdef LIKWID_PERFMON
    if (getenv("LIKWID_FILEPATH") != NULL)
    {
        ownprintf("Writing Likwid Marker API results to file %s\n", getenv("LIKWID_FILEPATH"));
    }
    LIKWID_MARKER_CLOSE;
#endif

    bdestroy(HLINE);
    return EXIT_SUCCESS;
}
Exemplo n.º 13
0
void bstr_to_workgroup(Workgroup* group, const_bstring str, DataType type, int numberOfStreams)
{
    uint32_t i;
    int parseStreams = 0;
    bstring threadInfo;
    bstring streams= bformat("0");
    struct bstrList* tokens;
    struct bstrList* subtokens;
    AffinityDomains_t domains;
    AffinityDomain* domain = NULL;

    /* split the workgroup into the thread and the streams part */
    tokens = bsplit(str,'-');

    if (tokens->qty == 2)
    {
        threadInfo = bstrcpy(tokens->entry[0]);
        streams = bstrcpy(tokens->entry[1]);
        parseStreams = 1;
    }
    else if (tokens->qty == 1)
    {
        threadInfo = bstrcpy(tokens->entry[0]);
    }
    else
    {
        fprintf(stderr, "Error in parsing workgroup string\n");
    }

    bstrListDestroy (tokens);
    tokens = bsplit(threadInfo,':');

    if (tokens->qty == 5)
    {
        uint32_t maxNumThreads;
        int chunksize;
        int stride;
        int counter;
        int currentId = 0;
        int startId = 0;

        domains = get_affinityDomains();
        for (i = 0; i < domains->numberOfAffinityDomains; i++)
        {
            if (bstrcmp(domains->domains[i].tag, tokens->entry[0]) == BSTR_OK)
            {
                domain = &(domains->domains[i]);
                break;
            }
        }

        if (domain == NULL)
        {
            fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
                bdata(tokens->entry[0]));
            exit(EXIT_FAILURE);
        }

        group->size = bstr_to_doubleSize(tokens->entry[1], type);
        group->numberOfThreads = str2int(bdata(tokens->entry[2]));
        chunksize = str2int(bdata(tokens->entry[3]));
        stride = str2int(bdata(tokens->entry[4]));
        maxNumThreads = ceil((double)domain->numberOfProcessors / stride) * chunksize;

        if (group->numberOfThreads > maxNumThreads)
        {
            fprintf(stderr, "Warning: More threads (%d) requested than CPUs in domain %s fulfilling expression (%d).\n",
                    group->numberOfThreads, bdata(tokens->entry[0]), maxNumThreads);
        }

        group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));

        counter = chunksize;

        counter = 0;
        for (int j=0; j<group->numberOfThreads; j+=chunksize)
        {
            for(i=0;i<chunksize && j+i<group->numberOfThreads ;i++)
            {
                group->processorIds[startId++] = domain->processorList[counter+i];
            }
            counter += stride;
            if (counter >= domain->numberOfProcessors)
            {
                counter = counter-domain->numberOfProcessors;
            }
        }
    }
    else if (tokens->qty == 3)
    {
        domains = get_affinityDomains();
        for (i = 0; i < domains->numberOfAffinityDomains; i++)
        {
            if (bstrcmp(domains->domains[i].tag, tokens->entry[0]) == BSTR_OK)
            {
                domain = &(domains->domains[i]);
                break;
            }
        }

        if (domain == NULL)
        {
            fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
                    bdata(tokens->entry[0]));
            exit(EXIT_FAILURE);
        }

        group->size = bstr_to_doubleSize(tokens->entry[1], type);
        group->numberOfThreads = str2int(bdata(tokens->entry[2]));

        if (group->numberOfThreads > domain->numberOfProcessors)
        {
            fprintf(stderr, "Warning: More threads (%d) requested than CPUs in domain %s (%d).\n",
                    group->numberOfThreads, bdata(tokens->entry[0]), domain->numberOfProcessors);
        }

        group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));

        for (i=0; i<group->numberOfThreads; i++)
        {
            group->processorIds[i] = domain->processorList[i % domain->numberOfProcessors];
        }
    }
    else if (tokens->qty == 2)
    {
        domains = get_affinityDomains();
        for (i = 0; i < domains->numberOfAffinityDomains; i++)
        {
            if (bstrcmp(domains->domains[i].tag, tokens->entry[0]) == BSTR_OK)
            {
                domain = &(domains->domains[i]);
                break;
            }
        }

        if (domain == NULL)
        {
            fprintf(stderr, "Error: Domain %s not available on current machine.\nTry likwid-bench -p for supported domains.",
                            bdata(tokens->entry[0]));
            exit(EXIT_FAILURE);
        }

        group->size = bstr_to_doubleSize(tokens->entry[1], type);
        group->numberOfThreads = domain->numberOfProcessors;
        group->processorIds = (int*) malloc(group->numberOfThreads * sizeof(int));

        for (i=0; i<group->numberOfThreads; i++)
        {
            group->processorIds[i] = domain->processorList[i];
        }
    }
    else
    {
        fprintf(stderr, "Error in parsing workgroup string\n");
    }

    bstrListDestroy(tokens);

    /* parse stream list */
    if (parseStreams)
    {
        tokens = bsplit(streams,',');

        if (tokens->qty < numberOfStreams)
        {
            fprintf(stderr, "Error: Testcase requires at least %d streams\n", numberOfStreams);
        }

        group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));

        for (i=0;i<(uint32_t) tokens->qty;i++)
        {
            subtokens = bsplit(tokens->entry[i],':');

            if ( subtokens->qty == 3 )
            {
                int index = str2int(bdata(subtokens->entry[0]));
                if (index >= numberOfStreams)
                {
                    fprintf(stderr, "Error: Stream index %d out of range\n",index);
                }
                group->streams[index].domain = bstrcpy(subtokens->entry[1]);
                group->streams[index].offset = str2int(bdata(subtokens->entry[2]));
            }
            else if ( subtokens->qty == 2 )
            {
                int index = str2int(bdata(subtokens->entry[0]));
                if (index >= numberOfStreams)
                {
                    fprintf(stderr, "Error: Stream index %d out of range\n",index);
                }
                group->streams[index].domain = bstrcpy(subtokens->entry[1]);
                group->streams[index].offset = 0;
            }
            else
            {
                fprintf(stderr, "Error: Cannot parse stream placement defintition in %s\n", bdata(str));
            }

            bstrListDestroy(subtokens);
        }

        bstrListDestroy(tokens);
    }
    else
    {
        group->streams = (Stream*) malloc(numberOfStreams * sizeof(Stream));

        for (i=0; i< (uint32_t)numberOfStreams; i++)
        {
            group->streams[i].domain = domain->tag;
            group->streams[i].offset = 0;
        }
    }

    group->size /= numberOfStreams;
    return;
}