Пример #1
0
static void
cpu_startup(void *dummy)
{

	/*
	 * Initialise the decrementer-based clock.
	 */
	decr_init();

	/*
	 * Good {morning,afternoon,evening,night}.
	 */
	cpu_setup(PCPU_GET(cpuid));

#ifdef PERFMON
	perfmon_init();
#endif
	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)physmem),
	    ptoa((uintmax_t)physmem) / 1048576);
	realmem = physmem;

	if (bootverbose)
		printf("available KVA = %zu (%zu MB)\n",
		    virtual_end - virtual_avail,
		    (virtual_end - virtual_avail) / 1048576);

	/*
	 * Display any holes after the first chunk of extended memory.
	 */
	if (bootverbose) {
		int indx;

		printf("Physical memory chunk(s):\n");
		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
			vm_paddr_t size1 =
			    phys_avail[indx + 1] - phys_avail[indx];

			#ifdef __powerpc64__
			printf("0x%016jx - 0x%016jx, %jd bytes (%jd pages)\n",
			#else
			printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n",
			#endif
			    (uintmax_t)phys_avail[indx],
			    (uintmax_t)phys_avail[indx + 1] - 1,
			    (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE);
		}
	}

	vm_ksubmap_init(&kmi);

	printf("avail memory = %ju (%ju MB)\n",
	    ptoa((uintmax_t)vm_cnt.v_free_count),
	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);

	/*
	 * Set up buffers, so they can be used to read disk labels.
	 */
	bufinit();
	vm_pager_bufferinit();
}
Пример #2
0
int test_perfmonstart()
{
    CpuInfo_t cpuinfo;
    int group1, group2;
    int cpu = 0;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    ret = perfmon_addEventSet(eventset_ok);
    if (ret != 0)
        goto fail;
    group1 = ret;
    ret = perfmon_setupCounters(group1);
    if (ret != 0)
        goto fail;
    if (perfmon_getIdOfActiveGroup() != group1)
        goto fail;
    ret = perfmon_startCounters();
    if (ret != 0)
        goto fail;
    perfmon_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    topology_finalize();
    return 0;
}
Пример #3
0
static void opd_26_init(void)
{
	size_t i;
	size_t opd_buf_size;

	opd_create_vmlinux(vmlinux, kernel_range);

	opd_buf_size = opd_read_fs_int("/dev/oprofile/", "buffer_size", 1);
	kernel_pointer_size = opd_read_fs_int("/dev/oprofile/", "pointer_size", 1);

	s_buf_bytesize = opd_buf_size * kernel_pointer_size;

	sbuf = xmalloc(s_buf_bytesize);

	opd_reread_module_info();

	for (i = 0; i < OPD_MAX_STATS; i++)
		opd_stats[i] = 0;

	perfmon_init();

	cookie_init();
	sfile_init();
	anon_init();

	/* must be /after/ perfmon_init() at least */
	if (atexit(clean_exit)) {
		perfmon_exit();
		perror("oprofiled: couldn't set exit cleanup: ");
		exit(EXIT_FAILURE);
	}
}
Пример #4
0
int test_perfmonstop_noadd()
{
    CpuInfo_t cpuinfo;
    int cpu = 0;
    int group;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    ret = perfmon_stopCounters();
    if (ret == 0)
        goto fail;
    perfmon_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    topology_finalize();
    return 0;
}
Пример #5
0
int test_perfmonstop_nostart()
{
    CpuInfo_t cpuinfo;
    int cpu = 0;
    int group;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    ret = perfmon_addEventSet(eventset_ok);
    if (ret != 0)
        goto fail;
    group = ret;
    ret = perfmon_setupCounters(group);
    if (ret != 0)
        goto fail;
    ret = perfmon_stopCounters();
    if (ret == 0)
        goto fail;
    perfmon_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    topology_finalize();
    return 0;
}
Пример #6
0
int test_perfmonresult_noadd()
{
    CpuInfo_t cpuinfo;
    int cpu = 0;
    int group;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    double result = perfmon_getResult(0,0,0);
    if (result != 0)
        goto fail;
    perfmon_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    topology_finalize();
    return 0;
}
Пример #7
0
int main(int argc, char* argv[])
{
    int i;
    int* cpus;
    int gid;
    double result = 0.0;

    // Load the topology module and print some values.
    topology_init();
    // CpuInfo_t contains global information like name, CPU family, ...
    CpuInfo_t info = get_cpuInfo();
    // CpuTopology_t contains information about the topology of the CPUs.
    CpuTopology_t topo = get_cpuTopology();
    printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads);

    cpus = malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;

    for (i=0;i<topo->numHWThreads;i++)
    {
        cpus[i] = topo->threadPool[i].apicId;
    }

    // Must be called before perfmon_init() but only if you want to use another
    // access mode as the pre-configured one. For direct access (0) you have to
    // be root.
    //accessClient_setaccessmode(0);

    // Initialize the perfmon module.
    perfmon_init(topo->numHWThreads, cpus);

    // Add eventset string to the perfmon module.
    gid = perfmon_addEventSet(EVENTSET);

    // Setup the eventset identified by group ID (gid).
    perfmon_setupCounters(gid);
    // Start all counters in the previously set up event set.
    perfmon_startCounters();
    // Perform something
    sleep(2);
    // Stop all counters in the previously started event set.
    perfmon_stopCounters();


    // Print the result of every thread/CPU.
    for (i = 0;i < topo->numHWThreads; i++)
    {
        result = perfmon_getResult(gid, 0, i);
        printf("Measurement result for event set %s at CPU %d: %f\n", EVENTSET, cpus[i], result);
    }

    // Uninitialize the perfmon module.
    perfmon_finalize();
    // Uninitialize the topology module.
    topology_finalize();
    return 0;
}
Пример #8
0
static int lua_likwid_init(lua_State* L)
{
    int ret;
    int nrThreads = luaL_checknumber(L,1);
    luaL_argcheck(L, nrThreads > 0, 1, "CPU count must be greater than 0");
    int cpus[nrThreads];
    if (!lua_istable(L, -1)) {
      lua_pushstring(L,"No table given as second argument");
      lua_error(L);
    }
    for (ret = 1; ret<=nrThreads; ret++)
    {
        lua_rawgeti(L,-1,ret);
        cpus[ret-1] = lua_tounsigned(L,-1);
        lua_pop(L,1);
    }
    if (topology_isInitialized == 0)
    {
        topology_init();
        topology_isInitialized = 1;
        cpuinfo = get_cpuInfo();
        cputopo = get_cpuTopology();
    }
    if ((topology_isInitialized) && (cpuinfo == NULL))
    {
        cpuinfo = get_cpuInfo();
    }
    if ((topology_isInitialized) && (cputopo == NULL))
    {
        cputopo = get_cpuTopology();
    }
    if (numa_isInitialized == 0)
    {
        numa_init();
        numa_isInitialized = 1;
        numainfo = get_numaTopology();
    }
    if ((numa_isInitialized) && (numainfo == NULL))
    {
        numainfo = get_numaTopology();
    }
    if (perfmon_isInitialized == 0)
    {
        ret = perfmon_init(nrThreads, &(cpus[0]));
        if (ret != 0)
        {
            lua_pushstring(L,"Cannot initialize likwid perfmon");
            lua_error(L);
            return 1;
        }
        perfmon_isInitialized = 1;
        timer_isInitialized = 1;
        lua_pushinteger(L,ret);
    }
    return 1;
}
Пример #9
0
void arch_init()
{
	pci_init();
#ifdef __CONFIG_ENABLE_MPTABLES__
	mptables_parse();
	ioapic_init(); // MUST BE AFTER PCI/ISA INIT!
	// TODO: move these back to regular init.  requires fixing the 
	// __CONFIG_NETWORKING__ inits to not need multiple cores running.
#endif
	// this returns when all other cores are done and ready to receive IPIs
	#ifdef __CONFIG_SINGLE_CORE__
		smp_percpu_init();
	#else
		smp_boot();
	#endif
	proc_init();

	/* EXPERIMENTAL NETWORK FUNCTIONALITY
	 * To enable, define __CONFIG_NETWORKING__ in your Makelocal
	 * If enabled, will load the rl8168 driver (if device exists)
	 * and will a boot into userland matrix, so remote syscalls can be performed.
 	 * If in simulation, will do some debugging information with the ne2k device
	 *
	 * Note: If you use this, you should also define the mac address of the 
	 * teathered machine via USER_MAC_ADDRESS in Makelocal.
	 *
	 * Additionally, you should have a look at the syscall server in the tools directory
	 */
	#ifdef __CONFIG_NETWORKING__
	#ifdef __CONFIG_SINGLE_CORE__
		warn("You currently can't have networking if you boot into single core mode!!\n");
	#else
		rl8168_init();		
		ne2k_init();
		e1000_init();
	#endif // __CONFIG_SINGLE_CORE__
	#endif // __CONFIG_NETWORKING__

	perfmon_init();
		
#ifdef __CONFIG_MONITOR_ON_INT__
	/* Handler to read a char from the interrupt source and call the monitor.
	 * Need to read the character so the device will send another interrupt.
	 * Note this will read from both the serial and the keyboard, and throw away
	 * the result.  We condition, since we don't want to trigger on a keyboard
	 * up interrupt */
	void mon_int(struct trapframe *tf, void *data)
	{
		// Enable interrupts here so that we can receive 
		// other interrupts (e.g. from the NIC)
		enable_irq();
		if (cons_getc())
			monitor(0);
	}
Пример #10
0
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
	int ret = -ENODEV;

#ifdef CONFIG_PERFMON
	
	ret = perfmon_init(ops);
#endif
	ops->backtrace = ia64_backtrace;

	return ret;
}
Пример #11
0
int test_perfmoninit_faulty()
{
    int cpu = 0;
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    perfmon_finalize();
    return 0;
fail:
    perfmon_finalize();
    return 1;
}
Пример #12
0
int test_perfmoninit()
{
    int cpu = 0;
    int i;
    topology_init();
    affinity_init();
    for(i=0;i<10;i++)
    {
        perfmon_init(1, &cpu);
        perfmon_finalize();
    }
    affinity_finalize();
    topology_finalize();
    return 1;
}
Пример #13
0
static void opd_26_init(void)
{
	size_t i;
	size_t opd_buf_size;
	unsigned long long start_time = 0ULL;
	struct timeval tv;

	opd_create_vmlinux(vmlinux, kernel_range);
	opd_create_xen(xenimage, xen_range);

	opd_buf_size = opd_read_fs_int("/dev/oprofile/", "buffer_size", 1);
	kernel_pointer_size = opd_read_fs_int("/dev/oprofile/", "pointer_size", 1);

	s_buf_bytesize = opd_buf_size * kernel_pointer_size;

	sbuf = xmalloc(s_buf_bytesize);

	opd_reread_module_info();

	for (i = 0; i < OPD_MAX_STATS; i++)
		opd_stats[i] = 0;

	perfmon_init();

	cookie_init();
	sfile_init();
	anon_init();

	/* must be /after/ perfmon_init() at least */
	if (atexit(clean_exit)) {
		perfmon_exit();
		perror("oprofiled: couldn't set exit cleanup: ");
		exit(EXIT_FAILURE);
	}

	/* trigger kernel module setup before returning control to opcontrol */
	opd_open_files();
	gettimeofday(&tv, NULL);
	start_time = 0ULL;
	start_time = tv.tv_sec;
	sprintf(start_time_str, "%llu", start_time);
		  
}
Пример #14
0
int test_perfmonresult()
{
    CpuInfo_t cpuinfo;
    int cpu = 0;
    int group;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    ret = perfmon_addEventSet(eventset_ok);
    if (ret != 0)
        goto fail;
    group = ret;
    ret = perfmon_setupCounters(group);
    if (ret != 0)
        goto fail;

    ret = perfmon_startCounters();
    if (ret != 0)
        goto fail;
    sleep(1);
    ret = perfmon_stopCounters();
    if (ret != 0)
        goto fail;
    if ((perfmon_getResult(group,0,0) == 0)||(perfmon_getResult(group,1,0) == 0))
        goto fail;
    if (perfmon_getTimeOfGroup(group) == 0)
        goto fail;
    perfmon_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    topology_finalize();
    return 0;
}
Пример #15
0
int test_perfmoninit_valid()
{
    int cpu = 0;
    topology_init();
    affinity_init();
    int ret = perfmon_init(1, &cpu);
    if (ret != 0)
        goto fail;
    if (perfmon_getNumberOfGroups() != 0)
        goto fail;
    if (perfmon_getNumberOfThreads() != 1)
        goto fail;
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Пример #16
0
int main(int argc, char* argv[])
{
    int i, j;
    int err;
    int* cpus;
    int gid;
    double result = 0.0;
    char estr[] = "INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,CPU_CLK_UNHALTED_REF:FIXC2,TEMP_CORE:TMP0";
    // Load the topology module and print some values.
    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    // CpuInfo_t contains global information like name, CPU family, ...
    CpuInfo_t info = get_cpuInfo();
    // CpuTopology_t contains information about the topology of the CPUs.
    CpuTopology_t topo = get_cpuTopology();
    // Create affinity domains. Commonly only needed when reading Uncore counters
    //affinity_init();

    printf("Likwid example on a %s with %d CPUs\n", info->name, topo->numHWThreads);

    cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;

    for (i=0;i<topo->numHWThreads;i++)
    {
        cpus[i] = topo->threadPool[i].apicId;
    }

    // Must be called before perfmon_init() but only if you want to use another
    // access mode as the pre-configured one. For direct access (0) you have to
    // be root.
    //accessClient_setaccessmode(0);

    // Initialize the perfmon module.
    err = perfmon_init(topo->numHWThreads, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        topology_finalize();
        return 1;
    }

    // Add eventset string to the perfmon module.
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    // Setup the eventset identified by group ID (gid).
    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    // Start all counters in the previously set up event set.
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    // Perform something
    sleep(2);
    // Stop all counters in the previously started event set.
    err = perfmon_stopCounters();
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }


    // Print the result of every thread/CPU for all events in estr.
    char* ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < topo->numHWThreads; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    // Uninitialize the perfmon module.
    perfmon_finalize();
    // Uninitialize the topology module.
    topology_finalize();
    return 0;
}
Пример #17
0
int main (int argc, char** argv)
{
    int socket_fd = -1;
    int optInfo = 0;
    int optClock = 0;
    int optStethoscope = 0;
    int optSockets = 0;
    double runtime;
    int hasDRAM = 0;
    int c;
    bstring argString;
    bstring eventString = bfromcstr("CLOCK");
    int numSockets=1;
    int numThreads=0;
    int threadsSockets[MAX_NUM_NODES*2];
    int threads[MAX_NUM_THREADS];

    threadsSockets[0] = 0;
    
    if (argc == 1)
    {
    	HELP_MSG;
    	exit (EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1)
    {
        switch (c)
        {
            case 'c':
                CHECK_OPTION_STRING;
                numSockets = bstr_to_cpuset_physical((uint32_t*) threadsSockets, argString);
                bdestroy(argString);
                optSockets = 1;
                break;

            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'i':
                optInfo = 1;
                break;
            case 'M':  /* Set MSR Access mode */
                CHECK_OPTION_STRING;
                accessClient_setaccessmode(str2int((char*) argString->data));
                bdestroy(argString);
                break;
            case 'p':
                optClock = 1;
                break;
            case 's':
                CHECK_OPTION_STRING;
                optStethoscope = str2int((char*) argString->data);
                bdestroy(argString);
                break;
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case '?':
            	if (optopt == 's' || optopt == 'M' || optopt == 'c')
            	{
            		HELP_MSG;
            	}
                else if (isprint (optopt))
                {
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                }
                else
                {
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                }
                exit( EXIT_FAILURE);
            default:
                HELP_MSG;
                exit (EXIT_SUCCESS);
        }
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }
    
    if (optClock && optind == argc)
    {
    	fprintf(stderr,"Commandline option -p requires an executable.\n");
    	exit(EXIT_FAILURE);
    }
    if (optSockets && !optStethoscope && optind == argc)
    {
    	fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n");
    	exit(EXIT_FAILURE);
    }

    if (cpuid_init() == EXIT_FAILURE)
    {
        fprintf(stderr, "CPU not supported\n");
        exit(EXIT_FAILURE);
    }
    
    if (numSockets > cpuid_topology.numSockets)
    {
    	fprintf(stderr, "System has only %d sockets but %d are given on commandline\n",
    			cpuid_topology.numSockets, numSockets);
    	exit(EXIT_FAILURE);
    }

    numa_init(); /* consider NUMA node as power unit for the moment */
    accessClient_init(&socket_fd);
    msr_init(socket_fd);
    timer_init();

    /* check for supported processors */
    if ((cpuid_info.model == SANDYBRIDGE_EP) ||
            (cpuid_info.model == SANDYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE_EP) ||
            (cpuid_info.model == HASWELL) ||
            (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
            (cpuid_info.model == NEHALEM_LYNNFIELD) ||
            (cpuid_info.model == NEHALEM_WESTMERE))
    {
        power_init(numa_info.nodes[0].processors[0]);
    }
    else
    {
        fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n");
        exit(EXIT_FAILURE);
    }

    double clock = (double) timer_getCpuClock();

    printf(HLINE);
    printf("CPU name:\t%s \n",cpuid_info.name);
    printf("CPU clock:\t%3.2f GHz \n",  (float) clock * 1.E-09);
    printf(HLINE);

    if (optInfo)
    {
        if (power_info.turbo.numSteps != 0)
        {
            printf("Base clock:\t%.2f MHz \n",  power_info.baseFrequency );
            printf("Minimal clock:\t%.2f MHz \n",  power_info.minFrequency );
            printf("Turbo Boost Steps:\n");
            for (int i=0; i < power_info.turbo.numSteps; i++ )
            {
                printf("C%d %.2f MHz \n",i+1,  power_info.turbo.steps[i] );
            }
        }
        printf(HLINE);
    }

    if (cpuid_info.model == SANDYBRIDGE_EP)
    {
        hasDRAM = 1;
    }
    else if ((cpuid_info.model != SANDYBRIDGE) &&
            (cpuid_info.model != SANDYBRIDGE_EP)  &&
            (cpuid_info.model != IVYBRIDGE)  &&
            (cpuid_info.model != IVYBRIDGE_EP)  &&
            (cpuid_info.model != HASWELL))
    {
        fprintf (stderr, "RAPL not supported on this processor!\n");
        exit(EXIT_FAILURE);
    }

    if (optInfo)
    {
        printf("Thermal Spec Power: %g Watts \n", power_info.tdp );
        printf("Minimum  Power: %g Watts \n", power_info.minPower);
        printf("Maximum  Power: %g Watts \n", power_info.maxPower);
        printf("Maximum  Time Window: %g micro sec \n", power_info.maxTimeWindow);
        printf(HLINE);
        exit(EXIT_SUCCESS);
    }

    if (optClock)
    {
        affinity_init();
        argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1);
        for (int i=1; i<numSockets; i++)
        {
            bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1);
            bconcat(argString, tExpr);
        }
        numThreads = bstr_to_cpuset(threads, argString);
        bdestroy(argString);
        perfmon_init(numThreads, threads, stdout);
        perfmon_setupEventSet(eventString, NULL);
    }

    {
        PowerData pDataPkg[MAX_NUM_NODES*2];
        PowerData pDataDram[MAX_NUM_NODES*2];
        printf("Measure on sockets: %d", threadsSockets[0]);
        for (int i=1; i<numSockets; i++)
        {
            printf(", %d", threadsSockets[i]);
        }
        printf("\n");

        if (optStethoscope)
        {
            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }
            }
            sleep(optStethoscope);

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
            }
            runtime = (double) optStethoscope;
        }
        else
        {
            TimerData time;
            argv +=  optind;
            bstring exeString = bfromcstr(argv[0]);

            for (int i=1; i<(argc-optind); i++)
            {
                bconchar(exeString, ' ');
                bcatcstr(exeString, argv[i]);
            }
            printf("%s\n",bdata(exeString));


            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }

                timer_start(&time);
            }

            if (system(bdata(exeString)) == EOF)
            {
                fprintf(stderr, "Failed to execute %s!\n", bdata(exeString));
                exit(EXIT_FAILURE);
            }

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                timer_stop(&time);

                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
                runtime = timer_print(&time);
            }
        }

        if (!optClock)
        {
            printf("Runtime: %g second \n",runtime);
            printf(HLINE);
            for (int i=0; i<numSockets; i++)
            {
                printf("Socket %d\n",threadsSockets[i]);
                printf("Domain: PKG \n");
                printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i));
                printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime );
                if (hasDRAM)
                {
                    printf("Domain: DRAM \n");
                    printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i));
                    printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime );
                }
                printf("\n");
            }
        }
    }

#if 0
    if ( cpuid_hasFeature(TM2) )
    {
        thermal_init(0);
        printf("Current core temperatures:\n");

        for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ )
        {
            printf("Core %d: %u C\n",
                    numa_info.nodes[socketId].processors[i],
                    thermal_read(numa_info.nodes[socketId].processors[i]));
        }
    }
#endif

    msr_finalize();
    return EXIT_SUCCESS;
}
Пример #18
0
int main(int argn, char** argc)
{
    int err, i ,j;
    int numCPUs = 0;
    int gid;
    DATATYPE *a,*b,*c,*d;
    TimeData timer;
    double triad_time, copy_time, scale_time, stream_time;
    char estr[1024];
    double result, scalar = 3.0;
    char* ptr;

    if (argn != 3)
    {
        printf("Usage: %s <cpustr> <events>\n", argc[0]);
        return 1;
    }

    strcpy(estr, argc[2]);

    allocate_vector(&a, SIZE);
    allocate_vector(&b, SIZE);
    allocate_vector(&c, SIZE);
    allocate_vector(&d, SIZE);

    err = topology_init();
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's topology module\n");
        return 1;
    }
    CpuTopology_t topo = get_cpuTopology();
    affinity_init();
    int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
    if (!cpus)
        return 1;
    numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
    omp_set_num_threads(numCPUs);
    err = perfmon_init(numCPUs, cpus);
    if (err < 0)
    {
        printf("Failed to initialize LIKWID's performance monitoring module\n");
        affinity_finalize();
        topology_finalize();
        return 1;
    }
    gid = perfmon_addEventSet(estr);
    if (gid < 0)
    {
        printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

    err = perfmon_setupCounters(gid);
    if (err < 0)
    {
        printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
        perfmon_finalize();
        affinity_finalize();
        topology_finalize();
        return 1;
    }

#ifdef _OPENMP
    printf(HLINE);
#pragma omp parallel
    {
#pragma omp master
    {
        printf ("Number of Threads requested = %i\n",omp_get_num_threads());
    }
    likwid_pinThread(cpus[omp_get_thread_num()]);
    printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
    }
#endif

#pragma omp parallel for
    for (int j=0; j<SIZE; j++) {
        a[j] = 1.0;
        b[j] = 2.0;
        c[j] = 0.0;
        d[j] = 1.0;
    }

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("copy");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j];
            }
            LIKWID_MARKER_STOP("copy");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    copy_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);

    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("scale");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                b[j] = scalar*c[j];
            }
            LIKWID_MARKER_STOP("scale");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    scale_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {
            LIKWID_MARKER_START("stream");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                c[j] = a[j] + b[j];
            }
            LIKWID_MARKER_STOP("stream");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    stream_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }

    printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(2*SIZE*sizeof(DATATYPE)),
                        copy_time,
                        1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));

    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }
    strcpy(estr, argc[2]);
    perfmon_setupCounters(gid);
    err = perfmon_startCounters();
    if (err < 0)
    {
        printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }
    time_start(&timer);
#pragma omp parallel
    {
        for (int k=0; k<ITER; k++)
        {

            LIKWID_MARKER_START("triad");
#pragma omp for
            for (int j=0; j<SIZE; j++)
            {
                a[j] = b[j] +  c[j] * scalar;
            }
            LIKWID_MARKER_STOP("triad");
        }
    }
    time_stop(&timer);
    err = perfmon_stopCounters();
    triad_time = time_print(&timer)/(double)ITER;
    if (err < 0)
    {
        printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
        perfmon_finalize();
        topology_finalize();
        return 1;
    }



    printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
                        1E-6*(4*SIZE*sizeof(DATATYPE)),
                        triad_time,
                        1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
    ptr = strtok(estr,",");
    j = 0;
    while (ptr != NULL)
    {
        for (i = 0;i < numCPUs; i++)
        {
            result = perfmon_getResult(gid, j, cpus[i]);
            printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
        }
        ptr = strtok(NULL,",");
        j++;
    }

    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Пример #19
0
void
likwid_markerInit(void)
{
    int i;
    int verbosity;
    int setinit = 0;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    char* perfpid = getenv("LIKWID_PERF_EXECPID");
    char execpid[20];
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0)
    {
        setinit = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

//#ifndef LIKWID_USE_PERFEVENT
    HPMmode(atoi(modeStr));
//#endif
    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);

    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }
#ifdef LIKWID_USE_PERFEVENT
    if (perfpid != NULL)
    {
        snprintf(execpid, 19, "%d", getpid());
        setenv("LIKWID_PERF_PID", execpid, 1);
        char* perfflags = getenv("LIKWID_PERF_FLAGS");
        if (perfflags)
        {
            setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1);
        }
    }
#endif

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
            groupSet->groups[groups[0]].state = STATE_START;
        }
    }
    if (setinit)
    {
        likwid_init = 1;
    }
    threads2Pthread[registered_cpus] = pthread_self();
    registered_cpus++;

    groupSet->activeGroup = 0;

    perfmon_setupCounters(groupSet->activeGroup);
    perfmon_startCounters();
}
Пример #20
0
int test_perfmonperfgroup()
{
    CpuInfo_t cpuinfo;
	int i;
    int cpu = 0;
    topology_init();
    cpuinfo = get_cpuInfo();
    int ret = perfmon_init(1, &cpu);
    if (ret != 0) {
        printf("Perfmon init failed\n");
        goto fail;
    }
	char** glist = NULL;
    char** slist = NULL;
    char** llist = NULL;
    ret = perfmon_getGroups(&glist, &slist, &llist);
    if (ret <= 0)
    {
        goto fail;
	}
	ret = perfmon_addEventSet(glist[0]);
    if (ret != 0) {
        printf("Perfmon addEventSet(%s) failed\n", glist[0]);
        goto fail;
    }
	if (perfmon_getNumberOfEvents(ret) == 0) {
        printf("Perfmon number of events == 0\n");
        goto fail;
    }
    if (perfmon_getNumberOfMetrics(ret) == 0) {
        printf("Perfmon number of metrics == 0\n");
        goto fail;
    }
	for (i=0; i<perfmon_getNumberOfEvents(ret); i++) {
		if (strcmp(perfmon_getEventName(ret, i), "") == 0)
			goto fail;
		if (strcmp(perfmon_getCounterName(ret, i), "") == 0)
			goto fail;
	}
	if (strcmp(perfmon_getGroupName(ret), "Custom") == 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getGroupInfoShort(ret), "Custom") == 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getGroupInfoLong(ret), "Custom") == 0)
    {
        goto fail;
    }
	if (perfmon_getLastTimeOfGroup(ret) != 0)
    {
        goto fail;
    }
    if (perfmon_getTimeOfGroup(ret) != 0)
    {
        goto fail;
    }
	for (i=0; i<perfmon_getNumberOfMetrics(ret); i++) {
		if (strcmp(perfmon_getMetricName(ret, i), "") == 0)
			goto fail;
		if (perfmon_getMetric(ret, i, 0) != 0.0)
			goto fail;
	}
	free(glist);
    free(slist);
    free(llist);
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    if (glist)
        free(glist);
    if (slist)
        free(slist);
    if (llist)
        free(llist);
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Пример #21
0
int test_perfmoncustomgroup()
{
    CpuInfo_t cpuinfo;
    int cpu = 0;
    topology_init();
    cpuinfo = get_cpuInfo();
    int ret = perfmon_init(1, &cpu);
    if (ret != 0) {
        printf("Perfmon init failed\n");
        goto fail;
    }
    ret = perfmon_addEventSet(eventset_ok);
    if (ret != 0) {
        printf("Perfmon addEventSet(ok) failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfEvents(ret) != 3) {
        printf("Perfmon number of events != 3\n");
        goto fail;
    }
    if (perfmon_getNumberOfMetrics(ret) != 0) {
        printf("Perfmon number of metrics != 0\n");
        goto fail;
    }
    if (strcmp(perfmon_getEventName(ret, 0), event1_ok) != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getEventName(ret, 1), event2_ok) != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getEventName(ret, 2), event3_ok) != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getCounterName(ret, 0), ctr1_ok) != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getCounterName(ret, 1), ctr2_ok) != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getCounterName(ret, 2), ctr3_ok) != 0)
    {
        goto fail;
    }

    if (strcmp(perfmon_getGroupName(ret), "Custom") != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getGroupInfoShort(ret), "Custom") != 0)
    {
        goto fail;
    }
    if (strcmp(perfmon_getGroupInfoLong(ret), "Custom") != 0)
    {
        goto fail;
    }
    if (perfmon_getLastTimeOfGroup(ret) != 0)
    {
        goto fail;
    }
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Пример #22
0
int test_perfmonaddeventset()
{
    char eventset_fail1[] = "INSTR_RETIRED.ANY:FIXC0";
    char eventset_fail2[] = "INSTR_RETIRED-ANY:FIXC0";
    CpuInfo_t cpuinfo;
    int cpu = 0;
    topology_init();
    cpuinfo = get_cpuInfo();
    if (cpuinfo->isIntel == 0)
    {
        topology_finalize();
        return 1;
    }
    int ret = perfmon_init(1, &cpu);
    if (ret != 0) {
        printf("Perfmon init failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfGroups() != 0) {
        printf("Perfmon number of groups != 0\n");
        goto fail;
    }
    if (perfmon_getNumberOfThreads() != 1) {
        printf("Perfmon number of threads != 1\n");
        goto fail;
    }
    if (perfmon_getIdOfActiveGroup() != -1) {
        printf("Perfmon id of active group != -1\n");
        goto fail;
    }
    ret = perfmon_addEventSet(eventset_ok);
    if (ret != 0) {
        printf("Perfmon addEventSet(ok) failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfGroups() != 1) {
        printf("Perfmon number of groups != 1\n");
        goto fail;
    }
    if (perfmon_getNumberOfEvents(ret) != 3) {
        printf("Perfmon number of events != 3\n");
        goto fail;
    }
    if (perfmon_getIdOfActiveGroup() != -1) {
        printf("Perfmon id of active group != -1\n");
        goto fail;
    }
    ret = perfmon_addEventSet(eventset_option);
    if (ret != 1) {
        printf("Perfmon addEventSet(options) failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfGroups() != 2) {
        printf("Perfmon number of groups != 2\n");
        goto fail;
    }
    if (perfmon_getNumberOfEvents(ret) != 3) {
        printf("Perfmon number of events != 3\n");
        goto fail;
    }
    if (perfmon_getIdOfActiveGroup() != -1) {
        printf("Perfmon id of active group != -1\n");
        goto fail;
    }
    ret = perfmon_addEventSet(eventset_fail1);
    if (ret >= 0) {
        printf("Perfmon addEventSet(fail1) failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfGroups() != 2) {
        printf("Perfmon number of groups != 2\n");
        goto fail;
    }
    ret = perfmon_addEventSet(eventset_fail2);
    if (ret >= 0) {
        printf("Perfmon addEventSet(fail2) failed\n");
        goto fail;
    }
    if (perfmon_getNumberOfGroups() != 2) {
        printf("Perfmon number of groups != 2\n");
        goto fail;
    }
    if (perfmon_getIdOfActiveGroup() != -1) {
        printf("Perfmon id of active group != -1\n");
        goto fail;
    }
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 1;
fail:
    perfmon_finalize();
    affinity_finalize();
    topology_finalize();
    return 0;
}
Пример #23
0
void likwid_markerInit(void)
{
    int i;
    int verbosity;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL))
    {
        likwid_init = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n");
        fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    HPMmode(atoi(modeStr));

    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bstrListCreate();
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);
    
    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bstrListCreate();
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
        }
    }

    groupSet->activeGroup = 0;
}