Esempio n. 1
0
int
main(int argc, char *argv[])
{
    pid_t pid = fork();
    if(pid != 0) {
        exit(EXIT_SUCCESS);
    }
    if(setsid() == -1) {
        exit(EXIT_FAILURE);
    }
    lock_check("mlock");
	cmdline_parse(argc, argv);
    bzero(&procs, sizeof(procs));
    sig_catch(SIGCHLD, sigchld_handler); 
    while(1) {
        int i, ret;
        for (i = 1; i <= proc_num; i++) {
            if (procs.agents[i].flag == 0) {
                ret = start_process(i);
                if (ret != 0) {
                    // start process failed
                    log_error("start process failed");
                    exit(EXIT_FAILURE);
                }
            }
        }
        sleep(5); 
    }
	return 0;
}
Esempio n. 2
0
static int test_mlock_lock()
{
	char *map;
	int ret = 1;
	unsigned long page_size = getpagesize();

	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
		   MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
	if (map == MAP_FAILED) {
		perror("test_mlock_locked mmap");
		goto out;
	}

	if (mlock2_(map, 2 * page_size, 0)) {
		if (errno == ENOSYS) {
			printf("Cannot call new mlock family, skipping test\n");
			_exit(0);
		}
		perror("mlock2(0)");
		goto unmap;
	}

	if (lock_check(map))
		goto unmap;

	/* Now unlock and recheck attributes */
	if (munlock(map, 2 * page_size)) {
		perror("munlock()");
		goto unmap;
	}

	ret = unlock_lock_check(map);

unmap:
	munmap(map, 2 * page_size);
out:
	return ret;
}
Esempio n. 3
0
void likwid_markerInit(void)
{
    int i;
    int verbosity;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL))
    {
        likwid_init = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Cannot initalize LIKWID marker API, environment variables are not set\n");
        fprintf(stderr, "You have to set the -m commandline switch for likwid-perfctr\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    HPMmode(atoi(modeStr));

    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bstrListCreate();
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);
    
    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bstrListCreate();
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
        }
    }

    groupSet->activeGroup = 0;
}
Esempio n. 4
0
void likwid_markerInit(void)
{
    int cpuId = likwid_getProcessorId();
    char* modeStr = getenv("LIKWID_MODE");
    char* maskStr = getenv("LIKWID_MASK");

    if ((modeStr != NULL) && (maskStr != NULL))
    {
        likwid_init = 1;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    cpuid_init();
    numa_init();
    affinity_init();
    timer_init();
    hashTable_init();

    for(int i=0; i<MAX_NUM_THREADS; i++) thread_socketFD[i] = -1;
    for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;

    accessClient_mode = atoi(modeStr);
    str2BitMask(maskStr, &counterMask);

    if (accessClient_mode != DAEMON_AM_DIRECT)
    {
        accessClient_init(&thread_socketFD[cpuId]);
    }

    msr_init(thread_socketFD[cpuId]);
    thermal_init(cpuId);

    switch ( cpuid_info.family )
    {
        case P6_FAMILY:

            switch ( cpuid_info.model )
            {
                case PENTIUM_M_BANIAS:

                case PENTIUM_M_DOTHAN:

                    perfmon_counter_map = pm_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_PM;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_PM;
                    break;

                case ATOM_45:

                case ATOM_32:

                case ATOM_22:

                case ATOM:

                    perfmon_counter_map = core2_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_CORE2;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2;
                    break;

                case CORE_DUO:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;

                case XEON_MP:

                case CORE2_65:

                case CORE2_45:

                    perfmon_counter_map = core2_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_CORE2;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_CORE2;
                    break;

                case NEHALEM_EX:

                case WESTMERE_EX:

                    perfmon_counter_map = westmereEX_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_WESTMEREEX;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_WESTMEREEX;
                    perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_WESTMEREEX;
                    break;

                case NEHALEM_BLOOMFIELD:

                case NEHALEM_LYNNFIELD:

                case NEHALEM_WESTMERE_M:

                case NEHALEM_WESTMERE:

                    perfmon_counter_map = nehalem_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_NEHALEM;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_NEHALEM;
                    perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_NEHALEM;
                    break;

                case IVYBRIDGE:

                case IVYBRIDGE_EP:

                    {
                        int socket_fd = thread_socketFD[cpuId];
                        hasPCICounters = 1;
                        power_init(0); /* FIXME Static coreId is dangerous */
                        pci_init(socket_fd);
                        perfmon_counter_map = ivybridge_counter_map;
                        perfmon_numCounters = NUM_COUNTERS_IVYBRIDGE;
                        perfmon_numCountersCore = NUM_COUNTERS_CORE_IVYBRIDGE;
                        perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_IVYBRIDGE;
                    }
                    break;

                case HASWELL:

                case HASWELL_EX:

                case HASWELL_M1:

                case HASWELL_M2:

                    power_init(0); /* FIXME Static coreId is dangerous */

                    perfmon_counter_map = haswell_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_HASWELL;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_HASWELL;
                    break;

                case SANDYBRIDGE:

                case SANDYBRIDGE_EP:

                    {
                        int socket_fd = thread_socketFD[cpuId];
                        hasPCICounters = 1;
                        power_init(0); /* FIXME Static coreId is dangerous */
                        pci_init(socket_fd);
                        perfmon_counter_map = sandybridge_counter_map;
                        perfmon_numCounters = NUM_COUNTERS_SANDYBRIDGE;
                        perfmon_numCountersCore = NUM_COUNTERS_CORE_SANDYBRIDGE;
                        perfmon_numCountersUncore = NUM_COUNTERS_UNCORE_SANDYBRIDGE;
                    }
                    break;

                default:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;
            }
            break;

        case MIC_FAMILY:

            switch ( cpuid_info.model )
            {
                case XEON_PHI:

                    perfmon_counter_map = phi_counter_map;
                    perfmon_numCounters = NUM_COUNTERS_PHI;
                    perfmon_numCountersCore = NUM_COUNTERS_CORE_PHI;
                    break;

                default:
                    ERROR_PLAIN_PRINT(Unsupported Processor);
                    break;
            }
            break;

        case K8_FAMILY:

            perfmon_counter_map = k10_counter_map;
            perfmon_numCounters = NUM_COUNTERS_K10;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_K10;
            break;

        case K10_FAMILY:

            perfmon_counter_map = k10_counter_map;
            perfmon_numCounters = NUM_COUNTERS_K10;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_K10;
            break;

        case K15_FAMILY:

            perfmon_counter_map = interlagos_counter_map;
            perfmon_numCounters = NUM_COUNTERS_INTERLAGOS;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_INTERLAGOS;
            break;

        case K16_FAMILY:

            perfmon_counter_map = kabini_counter_map;
            perfmon_numCounters = NUM_COUNTERS_KABINI;
            perfmon_numCountersCore = NUM_COUNTERS_CORE_KABINI;
            break;

        default:
            ERROR_PLAIN_PRINT(Unsupported Processor);
            break;
    }
}
Esempio n. 5
0
static int test_munlockall()
{
	char *map;
	int ret = 1;
	unsigned long page_size = getpagesize();

	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
		   MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);

	if (map == MAP_FAILED) {
		perror("test_munlockall mmap");
		goto out;
	}

	if (mlockall(MCL_CURRENT)) {
		perror("mlockall(MCL_CURRENT)");
		goto out;
	}

	if (lock_check(map))
		goto unmap;

	if (munlockall()) {
		perror("munlockall()");
		goto unmap;
	}

	if (unlock_lock_check(map))
		goto unmap;

	munmap(map, 2 * page_size);

	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
		   MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);

	if (map == MAP_FAILED) {
		perror("test_munlockall second mmap");
		goto out;
	}

	if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
		perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
		goto unmap;
	}

	if (onfault_check(map))
		goto unmap;

	if (munlockall()) {
		perror("munlockall()");
		goto unmap;
	}

	if (unlock_onfault_check(map))
		goto unmap;

	if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
		perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
		goto out;
	}

	if (lock_check(map))
		goto unmap;

	if (munlockall()) {
		perror("munlockall()");
		goto unmap;
	}

	ret = unlock_lock_check(map);

unmap:
	munmap(map, 2 * page_size);
out:
	munlockall();
	return ret;
}
Esempio n. 6
0
void
likwid_markerInit(void)
{
    int i;
    int verbosity;
    int setinit = 0;
    bstring bThreadStr;
    bstring bEventStr;
    struct bstrList* threadTokens;
    struct bstrList* eventStrings;
    char* modeStr = getenv("LIKWID_MODE");
    char* eventStr = getenv("LIKWID_EVENTS");
    char* cThreadStr = getenv("LIKWID_THREADS");
    char* filepath = getenv("LIKWID_FILEPATH");
    char* perfpid = getenv("LIKWID_PERF_EXECPID");
    char execpid[20];
    /* Dirty hack to avoid nonnull warnings */
    int (*ownatoi)(const char*);
    ownatoi = &atoi;

    if ((modeStr != NULL) && (filepath != NULL) && (eventStr != NULL) && (cThreadStr != NULL) && likwid_init == 0)
    {
        setinit = 1;
    }
    else if (likwid_init == 0)
    {
        fprintf(stderr, "Running without Marker API. Activate Marker API with -m on commandline.\n");
        return;
    }
    else
    {
        return;
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }

    topology_init();
    numa_init();
    affinity_init();
    hashTable_init();

//#ifndef LIKWID_USE_PERFEVENT
    HPMmode(atoi(modeStr));
//#endif
    if (getenv("LIKWID_DEBUG") != NULL)
    {
        perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
        verbosity = perfmon_verbosity;
    }

    bThreadStr = bfromcstr(cThreadStr);
    threadTokens = bsplit(bThreadStr,',');
    num_cpus = threadTokens->qty;
    for (i=0; i<num_cpus; i++)
    {
        threads2Cpu[i] = ownatoi(bdata(threadTokens->entry[i]));
    }
    bdestroy(bThreadStr);
    bstrListDestroy(threadTokens);

    if (getenv("LIKWID_PIN") != NULL)
    {
        likwid_pinThread(threads2Cpu[0]);
        if (getenv("OMP_NUM_THREADS") != NULL)
        {
            if (ownatoi(getenv("OMP_NUM_THREADS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
        if (getenv("CILK_NWORKERS") != NULL)
        {
            if (ownatoi(getenv("CILK_NWORKERS")) > num_cpus)
            {
                use_locks = 1;
            }
        }
    }
#ifdef LIKWID_USE_PERFEVENT
    if (perfpid != NULL)
    {
        snprintf(execpid, 19, "%d", getpid());
        setenv("LIKWID_PERF_PID", execpid, 1);
        char* perfflags = getenv("LIKWID_PERF_FLAGS");
        if (perfflags)
        {
            setenv("LIKWID_PERF_FLAGS", getenv("LIKWID_PERF_FLAGS"), 1);
        }
    }
#endif

    i = perfmon_init(num_cpus, threads2Cpu);
    if (i<0)
    {
        //fprintf(stderr,"Failed to initialize LIKWID perfmon library.\n");
        return;
    }

    bEventStr = bfromcstr(eventStr);
    eventStrings = bsplit(bEventStr,'|');
    numberOfGroups = eventStrings->qty;
    groups = malloc(numberOfGroups * sizeof(int));
    if (!groups)
    {
        fprintf(stderr,"Cannot allocate space for group handling.\n");
        bstrListDestroy(eventStrings);
        exit(EXIT_FAILURE);
    }
    for (i=0; i<eventStrings->qty; i++)
    {
        groups[i] = perfmon_addEventSet(bdata(eventStrings->entry[i]));
    }
    bstrListDestroy(eventStrings);
    bdestroy(bEventStr);

    for (i=0; i<num_cpus; i++)
    {
        hashTable_initThread(threads2Cpu[i]);
        for(int j=0; j<groupSet->groups[groups[0]].numberOfEvents;j++)
        {
            groupSet->groups[groups[0]].events[j].threadCounter[i].init = TRUE;
            groupSet->groups[groups[0]].state = STATE_START;
        }
    }
    if (setinit)
    {
        likwid_init = 1;
    }
    threads2Pthread[registered_cpus] = pthread_self();
    registered_cpus++;

    groupSet->activeGroup = 0;

    perfmon_setupCounters(groupSet->activeGroup);
    perfmon_startCounters();
}
Esempio n. 7
0
int main (int argc, char** argv)
{
    int socket_fd = -1;
    int optInfo = 0;
    int optClock = 0;
    int optStethoscope = 0;
    int optSockets = 0;
    double runtime;
    int hasDRAM = 0;
    int c;
    bstring argString;
    bstring eventString = bfromcstr("CLOCK");
    int numSockets=1;
    int numThreads=0;
    int threadsSockets[MAX_NUM_NODES*2];
    int threads[MAX_NUM_THREADS];

    threadsSockets[0] = 0;
    
    if (argc == 1)
    {
    	HELP_MSG;
    	exit (EXIT_SUCCESS);
    }

    while ((c = getopt (argc, argv, "+c:hiM:ps:v")) != -1)
    {
        switch (c)
        {
            case 'c':
                CHECK_OPTION_STRING;
                numSockets = bstr_to_cpuset_physical((uint32_t*) threadsSockets, argString);
                bdestroy(argString);
                optSockets = 1;
                break;

            case 'h':
                HELP_MSG;
                exit (EXIT_SUCCESS);
            case 'i':
                optInfo = 1;
                break;
            case 'M':  /* Set MSR Access mode */
                CHECK_OPTION_STRING;
                accessClient_setaccessmode(str2int((char*) argString->data));
                bdestroy(argString);
                break;
            case 'p':
                optClock = 1;
                break;
            case 's':
                CHECK_OPTION_STRING;
                optStethoscope = str2int((char*) argString->data);
                bdestroy(argString);
                break;
            case 'v':
                VERSION_MSG;
                exit (EXIT_SUCCESS);
            case '?':
            	if (optopt == 's' || optopt == 'M' || optopt == 'c')
            	{
            		HELP_MSG;
            	}
                else if (isprint (optopt))
                {
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                }
                else
                {
                    fprintf (stderr,
                            "Unknown option character `\\x%x'.\n",
                            optopt);
                }
                exit( EXIT_FAILURE);
            default:
                HELP_MSG;
                exit (EXIT_SUCCESS);
        }
    }

    if (!lock_check())
    {
        fprintf(stderr,"Access to performance counters is locked.\n");
        exit(EXIT_FAILURE);
    }
    
    if (optClock && optind == argc)
    {
    	fprintf(stderr,"Commandline option -p requires an executable.\n");
    	exit(EXIT_FAILURE);
    }
    if (optSockets && !optStethoscope && optind == argc)
    {
    	fprintf(stderr,"Commandline option -c requires an executable if not used in combination with -s.\n");
    	exit(EXIT_FAILURE);
    }

    if (cpuid_init() == EXIT_FAILURE)
    {
        fprintf(stderr, "CPU not supported\n");
        exit(EXIT_FAILURE);
    }
    
    if (numSockets > cpuid_topology.numSockets)
    {
    	fprintf(stderr, "System has only %d sockets but %d are given on commandline\n",
    			cpuid_topology.numSockets, numSockets);
    	exit(EXIT_FAILURE);
    }

    numa_init(); /* consider NUMA node as power unit for the moment */
    accessClient_init(&socket_fd);
    msr_init(socket_fd);
    timer_init();

    /* check for supported processors */
    if ((cpuid_info.model == SANDYBRIDGE_EP) ||
            (cpuid_info.model == SANDYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE) ||
            (cpuid_info.model == IVYBRIDGE_EP) ||
            (cpuid_info.model == HASWELL) ||
            (cpuid_info.model == NEHALEM_BLOOMFIELD) ||
            (cpuid_info.model == NEHALEM_LYNNFIELD) ||
            (cpuid_info.model == NEHALEM_WESTMERE))
    {
        power_init(numa_info.nodes[0].processors[0]);
    }
    else
    {
        fprintf (stderr, "Query Turbo Mode only supported on Intel Nehalem/Westmere/SandyBridge/IvyBridge/Haswell processors!\n");
        exit(EXIT_FAILURE);
    }

    double clock = (double) timer_getCpuClock();

    printf(HLINE);
    printf("CPU name:\t%s \n",cpuid_info.name);
    printf("CPU clock:\t%3.2f GHz \n",  (float) clock * 1.E-09);
    printf(HLINE);

    if (optInfo)
    {
        if (power_info.turbo.numSteps != 0)
        {
            printf("Base clock:\t%.2f MHz \n",  power_info.baseFrequency );
            printf("Minimal clock:\t%.2f MHz \n",  power_info.minFrequency );
            printf("Turbo Boost Steps:\n");
            for (int i=0; i < power_info.turbo.numSteps; i++ )
            {
                printf("C%d %.2f MHz \n",i+1,  power_info.turbo.steps[i] );
            }
        }
        printf(HLINE);
    }

    if (cpuid_info.model == SANDYBRIDGE_EP)
    {
        hasDRAM = 1;
    }
    else if ((cpuid_info.model != SANDYBRIDGE) &&
            (cpuid_info.model != SANDYBRIDGE_EP)  &&
            (cpuid_info.model != IVYBRIDGE)  &&
            (cpuid_info.model != IVYBRIDGE_EP)  &&
            (cpuid_info.model != HASWELL))
    {
        fprintf (stderr, "RAPL not supported on this processor!\n");
        exit(EXIT_FAILURE);
    }

    if (optInfo)
    {
        printf("Thermal Spec Power: %g Watts \n", power_info.tdp );
        printf("Minimum  Power: %g Watts \n", power_info.minPower);
        printf("Maximum  Power: %g Watts \n", power_info.maxPower);
        printf("Maximum  Time Window: %g micro sec \n", power_info.maxTimeWindow);
        printf(HLINE);
        exit(EXIT_SUCCESS);
    }

    if (optClock)
    {
        affinity_init();
        argString = bformat("S%u:0-%u", threadsSockets[0], cpuid_topology.numCoresPerSocket-1);
        for (int i=1; i<numSockets; i++)
        {
            bstring tExpr = bformat("@S%u:0-%u", threadsSockets[i], cpuid_topology.numCoresPerSocket-1);
            bconcat(argString, tExpr);
        }
        numThreads = bstr_to_cpuset(threads, argString);
        bdestroy(argString);
        perfmon_init(numThreads, threads, stdout);
        perfmon_setupEventSet(eventString, NULL);
    }

    {
        PowerData pDataPkg[MAX_NUM_NODES*2];
        PowerData pDataDram[MAX_NUM_NODES*2];
        printf("Measure on sockets: %d", threadsSockets[0]);
        for (int i=1; i<numSockets; i++)
        {
            printf(", %d", threadsSockets[i]);
        }
        printf("\n");

        if (optStethoscope)
        {
            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }
            }
            sleep(optStethoscope);

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
            }
            runtime = (double) optStethoscope;
        }
        else
        {
            TimerData time;
            argv +=  optind;
            bstring exeString = bfromcstr(argv[0]);

            for (int i=1; i<(argc-optind); i++)
            {
                bconchar(exeString, ' ');
                bcatcstr(exeString, argv[i]);
            }
            printf("%s\n",bdata(exeString));


            if (optClock)
            {
                perfmon_startCounters();
            }
            else
            {
                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    if (hasDRAM) power_start(pDataDram+i, cpuId, DRAM);
                    power_start(pDataPkg+i, cpuId, PKG);
                }

                timer_start(&time);
            }

            if (system(bdata(exeString)) == EOF)
            {
                fprintf(stderr, "Failed to execute %s!\n", bdata(exeString));
                exit(EXIT_FAILURE);
            }

            if (optClock)
            {
                perfmon_stopCounters();
                perfmon_printCounterResults();
                perfmon_finalize();
            }
            else
            {
                timer_stop(&time);

                for (int i=0; i<numSockets; i++)
                {
                    int cpuId = numa_info.nodes[threadsSockets[i]].processors[0];
                    power_stop(pDataPkg+i, cpuId, PKG);
                    if (hasDRAM) power_stop(pDataDram+i, cpuId, DRAM);
                }
                runtime = timer_print(&time);
            }
        }

        if (!optClock)
        {
            printf("Runtime: %g second \n",runtime);
            printf(HLINE);
            for (int i=0; i<numSockets; i++)
            {
                printf("Socket %d\n",threadsSockets[i]);
                printf("Domain: PKG \n");
                printf("Energy consumed: %g Joules \n", power_printEnergy(pDataPkg+i));
                printf("Power consumed: %g Watts \n", power_printEnergy(pDataPkg+i) / runtime );
                if (hasDRAM)
                {
                    printf("Domain: DRAM \n");
                    printf("Energy consumed: %g Joules \n", power_printEnergy(pDataDram+i));
                    printf("Power consumed: %g Watts \n", power_printEnergy(pDataDram+i) / runtime );
                }
                printf("\n");
            }
        }
    }

#if 0
    if ( cpuid_hasFeature(TM2) )
    {
        thermal_init(0);
        printf("Current core temperatures:\n");

        for (uint32_t i = 0; i < cpuid_topology.numCoresPerSocket; i++ )
        {
            printf("Core %d: %u C\n",
                    numa_info.nodes[socketId].processors[i],
                    thermal_read(numa_info.nodes[socketId].processors[i]));
        }
    }
#endif

    msr_finalize();
    return EXIT_SUCCESS;
}