Exemple #1
0
int clocks_storage(uint64_t *** aperf_val, uint64_t *** mperf_val, uint64_t *** tsc_val)
{
    static int init = 1;
    static uint64_t ** aperf = NULL, ** mperf = NULL, ** tsc = NULL;
    static uint64_t totalThreads = 0;
    if (init)
    {
        totalThreads = num_devs();
        aperf = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *));
        mperf = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *));
        tsc = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *));
        allocate_batch(CLOCKS_DATA, 3UL * num_devs());
        load_thread_batch(MSR_IA32_APERF, aperf, CLOCKS_DATA);
        load_thread_batch(MSR_IA32_MPERF, mperf, CLOCKS_DATA);
        load_thread_batch(IA32_TIME_STAMP_COUNTER, tsc, CLOCKS_DATA);
        init = 0;
    }
    if (aperf_val)
    {
        *aperf_val = aperf;
    }
    if (mperf_val)
    {
        *mperf_val = mperf;
    }
    if (tsc_val)
    {
        *tsc_val = tsc;
    }
    return 0;
}
Exemple #2
0
work_queue_t *work_queue(void)
{
    work_queue_t *w = NEW(work_queue_t);
    if (!w)
        return 0;

    DEQ_INIT(w->items);
    DEQ_INIT(w->free_list);

    allocate_batch(w);

    return w;
}
Exemple #3
0
void work_queue_put(work_queue_t *w, pn_connector_t *conn)
{
    work_item_t *item;

    if (!w)
        return;
    if (DEQ_SIZE(w->free_list) == 0)
        allocate_batch(w);
    if (DEQ_SIZE(w->free_list) == 0)
        return;

    item = DEQ_HEAD(w->free_list);
    DEQ_REMOVE_HEAD(w->free_list);

    item->conn = conn;

    DEQ_INSERT_TAIL(w->items, item);
}
Exemple #4
0
void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size)
{
    int bucket;
    void *mem;

    /* if too large, or if no worker, fall back to __cilkrts_malloc()  */
    if (!w || size > FRAME_MALLOC_MAX_SIZE) {
        NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE);
        return __cilkrts_malloc(size);
    }

    START_INTERVAL(w, INTERVAL_FRAME_ALLOC); {
        bucket = bucket_of_size(size);
        size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket);

        while (!(mem = pop(&w->l->free_list[bucket]))) {
            /* get a batch of frames from the global pool */
            START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) {
                allocate_batch(w, bucket, size);
            } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL);
        }
    } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
Exemple #5
0
/// @brief Allocate RAPL data for batch operations.
///
/// @param [in] rapl_flags Platform-specific bit flags indicating availability
///        of RAPL MSRs.
///
/// @param [in] rapl Measurements of energy, time, and power data from a given
///        RAPL power domain.
static void create_rapl_data_batch(uint64_t *rapl_flags, struct rapl_data *rapl)
{
    uint64_t sockets = num_sockets();

    allocate_batch(RAPL_DATA, rapl_data_batch_size(rapl_flags) * sockets);
    if (*rapl_flags & PKG_ENERGY_STATUS)
    {
        rapl->pkg_bits = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *));
        rapl->pkg_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->old_pkg_bits = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t));
        rapl->old_pkg_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->pkg_delta_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->pkg_watts = (double *) libmsr_calloc(sockets, sizeof(double));
        load_socket_batch(MSR_PKG_ENERGY_STATUS, rapl->pkg_bits, RAPL_DATA);
    }
    if (*rapl_flags & PKG_PERF_STATUS)
    {
        rapl->pkg_perf_count = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t));
        load_socket_batch(MSR_PKG_PERF_STATUS, rapl->pkg_perf_count, RAPL_DATA);
    }
    if (*rapl_flags & DRAM_ENERGY_STATUS)
    {
        rapl->dram_bits = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *));
        rapl->old_dram_bits = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t));
        rapl->dram_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->old_dram_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->dram_delta_joules = (double *) libmsr_calloc(sockets, sizeof(double));
        rapl->dram_watts = (double *) libmsr_calloc(sockets, sizeof(double));
        load_socket_batch(MSR_DRAM_ENERGY_STATUS, rapl->dram_bits, RAPL_DATA);
    }
    if (*rapl_flags & DRAM_PERF_STATUS)
    {
        rapl->dram_perf_count = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t));
        load_socket_batch(MSR_DRAM_PERF_STATUS, rapl->dram_perf_count, RAPL_DATA);
    }
}
Exemple #6
0
void get_rapl_power_unit(struct rapl_units *ru)
{
    static int init = 0;
    static uint64_t sockets = 0;
    static uint64_t **val = NULL;
    int i;

    sockets = num_sockets();
    if (!init)
    {
        init = 1;
        val = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *));
        allocate_batch(RAPL_UNIT, sockets);
        load_socket_batch(MSR_RAPL_POWER_UNIT, val, RAPL_UNIT);
    }
    read_batch(RAPL_UNIT);
    /* Initialize the units used for each socket. */
    for (i = 0; i < sockets; i++)
    {
        // See figure 14-16 for bit fields.
        //  1  1 1  1 1
        //  9  6 5  2 1  8 7  4 3  0
        //
        //  1010 0001 0000 0000 0011
        //
        //     A    1    0    0    3
        //ru[i].msr_rapl_power_unit = 0xA1003;

        ru[i].msr_rapl_power_unit = *val[i];
        /* Default is 1010b or 976 microseconds. */
        /* Storing (1/(2^TU))^-1 for maximum precision. */
        ru[i].seconds = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 19, 16)));
        /* Default is 10000b or 15.3 microjoules. */
        /* Storing (1/(2^ESU))^-1 for maximum precision. */
        ru[i].joules = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 12, 8)));
#ifdef LIBMSR_DEBUG
        fprintf(stderr, "DEBUG: joules unit is %f register has %lx\n", ru[i].joules, ru[i].msr_rapl_power_unit);
#endif
        /* Default is 0011b or 1/8 Watts. */
        ru[i].watts = ((1.0)/((double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 3, 0)))));
#ifdef LIBMSR_DEBUG
        fprintf(stdout, "Pkg %d MSR_RAPL_POWER_UNIT\n", i);
        fprintf(stdout, "Raw: %f sec, %f J, %f watts\n", ru[i].seconds, ru[i].joules, ru[i].watts);
        fprintf(stdout, "Adjusted: %f sec, %f J, %f watts\n", 1/ru[i].seconds, 1/ru[i].joules, ru[i].watts);
#endif
    }

    /* Check consistency between packages. */
    uint64_t *tmp = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t));
    for (i = 0; i < sockets; i++)
    {
        read_msr_by_coord(i, 0, 0, MSR_RAPL_POWER_UNIT, tmp);
        double energy = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 12, 8)));
        double seconds = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 19, 16)));
        double power = ((1.0)/((double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 3, 0)))));
        if (energy != ru[i].joules || power != ru[i].watts || seconds != ru[i].seconds)
        {
            libmsr_error_handler("get_rapl_power_unit(): Inconsistent rapl power units across packages", LIBMSR_ERROR_RUNTIME, getenv("HOSTNAME"), __FILE__, __LINE__);
        }
    }
}