int clocks_storage(uint64_t *** aperf_val, uint64_t *** mperf_val, uint64_t *** tsc_val) { static int init = 1; static uint64_t ** aperf = NULL, ** mperf = NULL, ** tsc = NULL; static uint64_t totalThreads = 0; if (init) { totalThreads = num_devs(); aperf = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *)); mperf = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *)); tsc = (uint64_t **) libmsr_malloc(totalThreads * sizeof(uint64_t *)); allocate_batch(CLOCKS_DATA, 3UL * num_devs()); load_thread_batch(MSR_IA32_APERF, aperf, CLOCKS_DATA); load_thread_batch(MSR_IA32_MPERF, mperf, CLOCKS_DATA); load_thread_batch(IA32_TIME_STAMP_COUNTER, tsc, CLOCKS_DATA); init = 0; } if (aperf_val) { *aperf_val = aperf; } if (mperf_val) { *mperf_val = mperf; } if (tsc_val) { *tsc_val = tsc; } return 0; }
work_queue_t *work_queue(void) { work_queue_t *w = NEW(work_queue_t); if (!w) return 0; DEQ_INIT(w->items); DEQ_INIT(w->free_list); allocate_batch(w); return w; }
void work_queue_put(work_queue_t *w, pn_connector_t *conn) { work_item_t *item; if (!w) return; if (DEQ_SIZE(w->free_list) == 0) allocate_batch(w); if (DEQ_SIZE(w->free_list) == 0) return; item = DEQ_HEAD(w->free_list); DEQ_REMOVE_HEAD(w->free_list); item->conn = conn; DEQ_INSERT_TAIL(w->items, item); }
void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size) { int bucket; void *mem; /* if too large, or if no worker, fall back to __cilkrts_malloc() */ if (!w || size > FRAME_MALLOC_MAX_SIZE) { NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE); return __cilkrts_malloc(size); } START_INTERVAL(w, INTERVAL_FRAME_ALLOC); { bucket = bucket_of_size(size); size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); while (!(mem = pop(&w->l->free_list[bucket]))) { /* get a batch of frames from the global pool */ START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) { allocate_batch(w, bucket, size); } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL); } } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC);
/// @brief Allocate RAPL data for batch operations. /// /// @param [in] rapl_flags Platform-specific bit flags indicating availability /// of RAPL MSRs. /// /// @param [in] rapl Measurements of energy, time, and power data from a given /// RAPL power domain. static void create_rapl_data_batch(uint64_t *rapl_flags, struct rapl_data *rapl) { uint64_t sockets = num_sockets(); allocate_batch(RAPL_DATA, rapl_data_batch_size(rapl_flags) * sockets); if (*rapl_flags & PKG_ENERGY_STATUS) { rapl->pkg_bits = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *)); rapl->pkg_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->old_pkg_bits = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t)); rapl->old_pkg_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->pkg_delta_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->pkg_watts = (double *) libmsr_calloc(sockets, sizeof(double)); load_socket_batch(MSR_PKG_ENERGY_STATUS, rapl->pkg_bits, RAPL_DATA); } if (*rapl_flags & PKG_PERF_STATUS) { rapl->pkg_perf_count = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t)); load_socket_batch(MSR_PKG_PERF_STATUS, rapl->pkg_perf_count, RAPL_DATA); } if (*rapl_flags & DRAM_ENERGY_STATUS) { rapl->dram_bits = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *)); rapl->old_dram_bits = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t)); rapl->dram_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->old_dram_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->dram_delta_joules = (double *) libmsr_calloc(sockets, sizeof(double)); rapl->dram_watts = (double *) libmsr_calloc(sockets, sizeof(double)); load_socket_batch(MSR_DRAM_ENERGY_STATUS, rapl->dram_bits, RAPL_DATA); } if (*rapl_flags & DRAM_PERF_STATUS) { rapl->dram_perf_count = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t)); load_socket_batch(MSR_DRAM_PERF_STATUS, rapl->dram_perf_count, RAPL_DATA); } }
void get_rapl_power_unit(struct rapl_units *ru) { static int init = 0; static uint64_t sockets = 0; static uint64_t **val = NULL; int i; sockets = num_sockets(); if (!init) { init = 1; val = (uint64_t **) libmsr_calloc(sockets, sizeof(uint64_t *)); allocate_batch(RAPL_UNIT, sockets); load_socket_batch(MSR_RAPL_POWER_UNIT, val, RAPL_UNIT); } read_batch(RAPL_UNIT); /* Initialize the units used for each socket. */ for (i = 0; i < sockets; i++) { // See figure 14-16 for bit fields. // 1 1 1 1 1 // 9 6 5 2 1 8 7 4 3 0 // // 1010 0001 0000 0000 0011 // // A 1 0 0 3 //ru[i].msr_rapl_power_unit = 0xA1003; ru[i].msr_rapl_power_unit = *val[i]; /* Default is 1010b or 976 microseconds. */ /* Storing (1/(2^TU))^-1 for maximum precision. */ ru[i].seconds = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 19, 16))); /* Default is 10000b or 15.3 microjoules. */ /* Storing (1/(2^ESU))^-1 for maximum precision. */ ru[i].joules = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 12, 8))); #ifdef LIBMSR_DEBUG fprintf(stderr, "DEBUG: joules unit is %f register has %lx\n", ru[i].joules, ru[i].msr_rapl_power_unit); #endif /* Default is 0011b or 1/8 Watts. */ ru[i].watts = ((1.0)/((double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 3, 0))))); #ifdef LIBMSR_DEBUG fprintf(stdout, "Pkg %d MSR_RAPL_POWER_UNIT\n", i); fprintf(stdout, "Raw: %f sec, %f J, %f watts\n", ru[i].seconds, ru[i].joules, ru[i].watts); fprintf(stdout, "Adjusted: %f sec, %f J, %f watts\n", 1/ru[i].seconds, 1/ru[i].joules, ru[i].watts); #endif } /* Check consistency between packages. */ uint64_t *tmp = (uint64_t *) libmsr_calloc(sockets, sizeof(uint64_t)); for (i = 0; i < sockets; i++) { read_msr_by_coord(i, 0, 0, MSR_RAPL_POWER_UNIT, tmp); double energy = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 12, 8))); double seconds = (double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 19, 16))); double power = ((1.0)/((double)(1 << (MASK_VAL(ru[i].msr_rapl_power_unit, 3, 0))))); if (energy != ru[i].joules || power != ru[i].watts || seconds != ru[i].seconds) { libmsr_error_handler("get_rapl_power_unit(): Inconsistent rapl power units across packages", LIBMSR_ERROR_RUNTIME, getenv("HOSTNAME"), __FILE__, __LINE__); } } }