void _starpu_worker_stop_sleeping(int workerid) { if (starpu_profiling_status_get()) { struct timespec *sleeping_start, sleep_end_time; _starpu_clock_gettime(&sleep_end_time); STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]); sleeping_start = &sleeping_start_date[workerid]; /* Perhaps that profiling was enabled while the worker was * already blocked, so we don't measure (end - start), but * (end - max(start,worker_start)) where worker_start is the * date of the previous profiling info reset on the worker */ struct timespec *worker_start = &worker_info[workerid].start_time; if (starpu_timespec_cmp(sleeping_start, worker_start, <)) { /* sleeping_start < worker_start */ sleeping_start = worker_start; } struct timespec sleeping_time; starpu_timespec_sub(&sleep_end_time, sleeping_start, &sleeping_time); starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time); worker_registered_sleeping_start[workerid] = 0; STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]); } }
void _starpu_profiling_set_task_push_end_time(struct starpu_task *task) { if (!starpu_profiling_status_get()) return; struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; if (profiling_info) _starpu_clock_gettime(&profiling_info->push_end_time); }
static void _starpu_worker_reset_profiling_info_with_lock(int workerid) { _starpu_clock_gettime(&worker_info[workerid].start_time); /* This is computed in a lazy fashion when the application queries * profiling info. */ starpu_timespec_clear(&worker_info[workerid].total_time); starpu_timespec_clear(&worker_info[workerid].executing_time); starpu_timespec_clear(&worker_info[workerid].sleeping_time); worker_info[workerid].executed_tasks = 0; worker_info[workerid].used_cycles = 0; worker_info[workerid].stall_cycles = 0; worker_info[workerid].power_consumed = 0; /* We detect if the worker is already sleeping or doing some * computation */ enum _starpu_worker_status status = _starpu_worker_get_status(workerid); if (status == STATUS_SLEEPING) { worker_registered_sleeping_start[workerid] = 1; _starpu_clock_gettime(&sleeping_start_date[workerid]); } else { worker_registered_sleeping_start[workerid] = 0; } if (status == STATUS_EXECUTING) { worker_registered_executing_start[workerid] = 1; _starpu_clock_gettime(&executing_start_date[workerid]); } else { worker_registered_executing_start[workerid] = 0; } }
void _starpu_worker_restart_sleeping(int workerid) { if (starpu_profiling_status_get()) { struct timespec sleep_start_time; _starpu_clock_gettime(&sleep_start_time); STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]); worker_registered_sleeping_start[workerid] = 1; memcpy(&sleeping_start_date[workerid], &sleep_start_time, sizeof(struct timespec)); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]); } }
int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *info) { if (!starpu_profiling_status_get()) { /* Not thread safe, shouldn't be too much a problem */ info->executed_tasks = worker_info[workerid].executed_tasks; } STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]); if (info) { /* The total time is computed in a lazy fashion */ struct timespec now; _starpu_clock_gettime(&now); /* In case some worker is currently sleeping, we take into * account the time spent since it registered. */ if (worker_registered_sleeping_start[workerid]) { struct timespec sleeping_time; starpu_timespec_sub(&now, &sleeping_start_date[workerid], &sleeping_time); starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time); } if (worker_registered_executing_start[workerid]) { struct timespec executing_time; starpu_timespec_sub(&now, &executing_start_date[workerid], &executing_time); starpu_timespec_accumulate(&worker_info[workerid].executing_time, &executing_time); } /* total_time = now - start_time */ starpu_timespec_sub(&now, &worker_info[workerid].start_time, &worker_info[workerid].total_time); memcpy(info, &worker_info[workerid], sizeof(struct starpu_profiling_worker_info)); } _starpu_worker_reset_profiling_info_with_lock(workerid); STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]); return 0; }
int starpu_bus_get_profiling_info(int busid, struct starpu_profiling_bus_info *bus_info) { int src_node = busid_to_node_pair[busid].src; int dst_node = busid_to_node_pair[busid].dst; /* XXX protect all this method with a mutex */ if (bus_info) { struct timespec now; _starpu_clock_gettime(&now); /* total_time = now - start_time */ starpu_timespec_sub(&now, &bus_profiling_info[src_node][dst_node].start_time, &bus_profiling_info[src_node][dst_node].total_time); memcpy(bus_info, &bus_profiling_info[src_node][dst_node], sizeof(struct starpu_profiling_bus_info)); } _starpu_bus_reset_profiling_info(&bus_profiling_info[src_node][dst_node]); return 0; }
static void _starpu_bus_reset_profiling_info(struct starpu_profiling_bus_info *bus_info) { _starpu_clock_gettime(&bus_info->start_time); bus_info->transferred_bytes = 0; bus_info->transfer_count = 0; }
struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker) { struct starpu_task *task; int worker_id; unsigned node; /* We can't tell in advance which task will be picked up, so we measure * a timestamp, and will attribute it afterwards to the task. */ int profiling = starpu_profiling_status_get(); struct timespec pop_start_time; if (profiling) _starpu_clock_gettime(&pop_start_time); pick: /* perhaps there is some local task to be executed first */ task = _starpu_pop_local_task(worker); /* get tasks from the stacks of the strategy */ if(!task) { struct _starpu_sched_ctx *sched_ctx ; #ifndef STARPU_NON_BLOCKING_DRIVERS int been_here[STARPU_NMAX_SCHED_CTXS]; int i; for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++) been_here[i] = 0; while(!task) #endif { if(worker->nsched_ctxs == 1) sched_ctx = _starpu_get_initial_sched_ctx(); else { while(1) { sched_ctx = _get_next_sched_ctx_to_pop_into(worker); if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1) { _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); worker->removed_from_ctx[sched_ctx->id] = 0; sched_ctx = NULL; } else break; } } if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS) { if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task) { task = sched_ctx->sched_policy->pop_task(sched_ctx->id); _starpu_pop_task_end(task); } } if(!task) { /* it doesn't matter if it shares tasks list or not in the scheduler, if it does not have any task to pop just get it out of here */ /* however if it shares a task list it will be removed as soon as he finishes this job (in handle_job_termination) */ if(worker->removed_from_ctx[sched_ctx->id]) { _starpu_worker_gets_out_of_ctx(sched_ctx->id, worker); worker->removed_from_ctx[sched_ctx->id] = 0; } #ifdef STARPU_USE_SC_HYPERVISOR if(worker->pop_ctx_priority) { struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { // _STARPU_TRACE_HYPERVISOR_BEGIN(); perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0); // _STARPU_TRACE_HYPERVISOR_END(); } } #endif //STARPU_USE_SC_HYPERVISOR #ifndef STARPU_NON_BLOCKING_DRIVERS if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1) break; been_here[sched_ctx->id] = 1; #endif } } } if (!task) { idle_start[worker->workerid] = starpu_timing_now(); return NULL; } if(idle_start[worker->workerid] != 0.0) { double idle_end = starpu_timing_now(); idle[worker->workerid] += (idle_end - idle_start[worker->workerid]); idle_start[worker->workerid] = 0.0; } #ifdef STARPU_USE_SC_HYPERVISOR struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx); struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters; if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id)) { // _STARPU_TRACE_HYPERVISOR_BEGIN(); perf_counters->notify_poped_task(task->sched_ctx, worker->workerid); // _STARPU_TRACE_HYPERVISOR_END(); } #endif //STARPU_USE_SC_HYPERVISOR /* Make sure we do not bother with all the multiformat-specific code if * it is not necessary. */ if (!_starpu_task_uses_multiformat_handles(task)) goto profiling; /* This is either a conversion task, or a regular task for which the * conversion tasks have already been created and submitted */ if (task->mf_skip) goto profiling; /* * This worker may not be able to execute this task. In this case, we * should return the task anyway. It will be pushed back almost immediatly. * This way, we avoid computing and executing the conversions tasks. * Here, we do not care about what implementation is used. */ worker_id = starpu_worker_get_id(); if (!starpu_worker_can_execute_task_first_impl(worker_id, task, NULL)) return task; node = starpu_worker_get_memory_node(worker_id); /* * We do have a task that uses multiformat handles. Let's create the * required conversion tasks. */ STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex); unsigned i; unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task); for (i = 0; i < nbuffers; i++) { struct starpu_task *conversion_task; starpu_data_handle_t handle; handle = STARPU_TASK_GET_HANDLE(task, i); if (!_starpu_handle_needs_conversion_task(handle, node)) continue; conversion_task = _starpu_create_conversion_task(handle, node); conversion_task->mf_skip = 1; conversion_task->execute_on_a_specific_worker = 1; conversion_task->workerid = worker_id; /* * Next tasks will need to know where these handles have gone. */ handle->mf_node = node; _starpu_task_submit_conversion_task(conversion_task, worker_id); } task->mf_skip = 1; starpu_task_list_push_back(&worker->local_tasks, task); STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex); goto pick; profiling: if (profiling) { struct starpu_profiling_task_info *profiling_info; profiling_info = task->profiling_info; /* The task may have been created before profiling was enabled, * so we check if the profiling_info structure is available * even though we already tested if profiling is enabled. */ if (profiling_info) { memcpy(&profiling_info->pop_start_time, &pop_start_time, sizeof(struct timespec)); _starpu_clock_gettime(&profiling_info->pop_end_time); } } if(task->prologue_callback_pop_func) task->prologue_callback_pop_func(task->prologue_callback_pop_arg); return task; }