int starpu_opencl_collect_stats(cl_event event STARPU_ATTRIBUTE_UNUSED)
{
#if defined(CL_PROFILING_CLOCK_CYCLE_COUNT)||defined(CL_PROFILING_STALL_CYCLE_COUNT)||defined(CL_PROFILING_POWER_CONSUMED)
	struct starpu_task *task = starpu_task_get_current();
	struct starpu_profiling_task_info *info = task->profiling_info;
#endif

#ifdef CL_PROFILING_CLOCK_CYCLE_COUNT
	if (starpu_profiling_status_get() && info)
	{
		cl_int err;
		unsigned int clock_cycle_count;
		size_t size;
		err = clGetEventProfilingInfo(event, CL_PROFILING_CLOCK_CYCLE_COUNT, sizeof(clock_cycle_count), &clock_cycle_count, &size);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
		STARPU_ASSERT(size == sizeof(clock_cycle_count));
		info->used_cycles += clock_cycle_count;
	}
#endif
#ifdef CL_PROFILING_STALL_CYCLE_COUNT
	if (starpu_profiling_status_get() && info)
	{
		cl_int err;
		unsigned int stall_cycle_count;
		size_t size;
		err = clGetEventProfilingInfo(event, CL_PROFILING_STALL_CYCLE_COUNT, sizeof(stall_cycle_count), &stall_cycle_count, &size);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
		STARPU_ASSERT(size == sizeof(stall_cycle_count));

		info->stall_cycles += stall_cycle_count;
	}
#endif
#ifdef CL_PROFILING_POWER_CONSUMED
	if (info && (starpu_profiling_status_get() || (task->cl && task->cl->power_model && task->cl->power_model->benchmarking)))
	{
		cl_int err;
		double power_consumed;
		size_t size;
		err = clGetEventProfilingInfo(event, CL_PROFILING_POWER_CONSUMED, sizeof(power_consumed), &power_consumed, &size);
		if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
		STARPU_ASSERT(size == sizeof(power_consumed));

		info->power_consumed += power_consumed;
	}
#endif

	return 0;
}
Пример #2
0
void starpu_profiling_worker_helper_display_summary(void)
{
	const char *stats;
	double sum_consumed = 0.;
	int profiling = starpu_profiling_status_get();
	double overall_time = 0;
	int workerid;
	int worker_cnt = starpu_worker_get_count();

	if (!((stats = getenv("STARPU_WORKER_STATS")) && atoi(stats))) return;

	fprintf(stderr, "\nWorker statistics:\n");
	fprintf(stderr,   "******************\n");

	for (workerid = 0; workerid < worker_cnt; workerid++)
	{
		struct starpu_profiling_worker_info info;
		starpu_profiling_worker_get_info(workerid, &info);
		char name[64];

		starpu_worker_get_name(workerid, name, sizeof(name));

		if (profiling)
		{
			double total_time = starpu_timing_timespec_to_us(&info.total_time) / 1000.;
			double executing_time = starpu_timing_timespec_to_us(&info.executing_time) / 1000.;
			double sleeping_time = starpu_timing_timespec_to_us(&info.sleeping_time) / 1000.;
			if (total_time > overall_time)
				overall_time = total_time;

			fprintf(stderr, "%-32s\n", name);
			fprintf(stderr, "\t%d task(s)\n\ttotal: %.2lf ms executing: %.2lf ms sleeping: %.2lf ms overhead %.2lf ms\n", info.executed_tasks, total_time, executing_time, sleeping_time, total_time - executing_time - sleeping_time);
			if (info.used_cycles || info.stall_cycles)
				fprintf(stderr, "\t%lu Mcy %lu Mcy stall\n", info.used_cycles/1000000, info.stall_cycles/1000000);
			if (info.power_consumed)
				fprintf(stderr, "\t%f J consumed\n", info.power_consumed);
		}
		else
		{
			fprintf(stderr, "\t%-32s\t%d task(s)\n", name, info.executed_tasks);
		}

		sum_consumed += info.power_consumed;
	}

	if (profiling)
	{
		const char *strval_idle_power = getenv("STARPU_IDLE_POWER");
		if (strval_idle_power)
		{
			double idle_power = atof(strval_idle_power); /* Watt */
			double idle_consumption = idle_power * overall_time / 1000.; /* J */

			fprintf(stderr, "Idle consumption: %.2lf J\n", idle_consumption);
			sum_consumed += idle_consumption;
		}
	}
	if (profiling && sum_consumed)
		fprintf(stderr, "Total consumption: %.2lf J\n", sum_consumed);
}
Пример #3
0
void _starpu_worker_register_executing_start_date(int workerid, struct timespec *executing_start)
{
	if (starpu_profiling_status_get())
	{
		STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
		worker_registered_executing_start[workerid] = 1;
		memcpy(&executing_start_date[workerid], executing_start, sizeof(struct timespec));
		STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
	}
}
Пример #4
0
void _starpu_profiling_set_task_push_end_time(struct starpu_task *task)
{
	if (!starpu_profiling_status_get())
		return;

	struct starpu_profiling_task_info *profiling_info;
	profiling_info = task->profiling_info;

	if (profiling_info)
		_starpu_clock_gettime(&profiling_info->push_end_time);
}
Пример #5
0
void _starpu_worker_restart_sleeping(int workerid)
{
	if (starpu_profiling_status_get())
	{
		struct timespec sleep_start_time;
		_starpu_clock_gettime(&sleep_start_time);

		STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);
		worker_registered_sleeping_start[workerid] = 1;
		memcpy(&sleeping_start_date[workerid], &sleep_start_time, sizeof(struct timespec));
		STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
	}
}
Пример #6
0
struct starpu_profiling_task_info *_starpu_allocate_profiling_info_if_needed(struct starpu_task *task)
{
	struct starpu_profiling_task_info *info = NULL;

	/* If we are benchmarking, we need room for the power consumption */
	if (starpu_profiling_status_get() || (task->cl && task->cl->power_model && (task->cl->power_model->benchmarking || _starpu_get_calibrate_flag())))
	{
		info = (struct starpu_profiling_task_info *) calloc(1, sizeof(struct starpu_profiling_task_info));
		STARPU_ASSERT(info);
	}

	return info;
}
Пример #7
0
int starpu_profiling_worker_get_info(int workerid, struct starpu_profiling_worker_info *info)
{
	if (!starpu_profiling_status_get())
	{
		/* Not thread safe, shouldn't be too much a problem */
		info->executed_tasks = worker_info[workerid].executed_tasks;
	}

	STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);

	if (info)
	{
		/* The total time is computed in a lazy fashion */
		struct timespec now;
		_starpu_clock_gettime(&now);

		/* In case some worker is currently sleeping, we take into
		 * account the time spent since it registered. */
		if (worker_registered_sleeping_start[workerid])
		{
			struct timespec sleeping_time;
			starpu_timespec_sub(&now, &sleeping_start_date[workerid], &sleeping_time);
			starpu_timespec_accumulate(&worker_info[workerid].sleeping_time, &sleeping_time);
		}

		if (worker_registered_executing_start[workerid])
		{
			struct timespec executing_time;
			starpu_timespec_sub(&now, &executing_start_date[workerid], &executing_time);
			starpu_timespec_accumulate(&worker_info[workerid].executing_time, &executing_time);
		}

		/* total_time = now - start_time */
		starpu_timespec_sub(&now, &worker_info[workerid].start_time,
					&worker_info[workerid].total_time);

		memcpy(info, &worker_info[workerid], sizeof(struct starpu_profiling_worker_info));
	}

	_starpu_worker_reset_profiling_info_with_lock(workerid);

	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);

	return 0;
}
Пример #8
0
void _starpu_worker_update_profiling_info_executing(int workerid, struct timespec *executing_time, int executed_tasks, uint64_t used_cycles, uint64_t stall_cycles, double power_consumed)
{
	if (starpu_profiling_status_get())
	{
		STARPU_PTHREAD_MUTEX_LOCK(&worker_info_mutex[workerid]);

		if (executing_time)
			starpu_timespec_accumulate(&worker_info[workerid].executing_time, executing_time);

		worker_info[workerid].used_cycles += used_cycles;
		worker_info[workerid].stall_cycles += stall_cycles;
		worker_info[workerid].power_consumed += power_consumed;
		worker_info[workerid].executed_tasks += executed_tasks;

		STARPU_PTHREAD_MUTEX_UNLOCK(&worker_info_mutex[workerid]);
	}
	else /* Not thread safe, shouldn't be too much a problem */
		worker_info[workerid].executed_tasks += executed_tasks;
}
Пример #9
0
/* Workers may block when there is no work to do at all. We assume that the
 * mutex is hold when that function is called. */
void _starpu_block_worker(int workerid, pthread_cond_t *cond, pthread_mutex_t *mutex)
{
   struct timespec start_time, end_time;

   STARPU_TRACE_WORKER_SLEEP_START
      _starpu_worker_set_status(workerid, STATUS_SLEEPING);

   starpu_clock_gettime(&start_time);
   _starpu_worker_register_sleeping_start_date(workerid, &start_time);

   PTHREAD_COND_WAIT(cond, mutex);

   _starpu_worker_set_status(workerid, STATUS_UNKNOWN);
   STARPU_TRACE_WORKER_SLEEP_END
      starpu_clock_gettime(&end_time);

   int profiling = starpu_profiling_status_get();
   if (profiling)
   {
      struct timespec sleeping_time;
      starpu_timespec_sub(&end_time, &start_time, &sleeping_time);
      _starpu_worker_update_profiling_info_sleeping(workerid, &start_time, &end_time);
   }
}
struct starpu_task *_starpu_pop_task(struct _starpu_worker *worker)
{
	struct starpu_task *task;
	int worker_id;
	unsigned node;

	/* We can't tell in advance which task will be picked up, so we measure
	 * a timestamp, and will attribute it afterwards to the task. */
	int profiling = starpu_profiling_status_get();
	struct timespec pop_start_time;
	if (profiling)
		_starpu_clock_gettime(&pop_start_time);

pick:
	/* perhaps there is some local task to be executed first */
	task = _starpu_pop_local_task(worker);


	/* get tasks from the stacks of the strategy */
	if(!task)
	{
		struct _starpu_sched_ctx *sched_ctx ;
#ifndef STARPU_NON_BLOCKING_DRIVERS
		int been_here[STARPU_NMAX_SCHED_CTXS];
		int i;
		for(i = 0; i < STARPU_NMAX_SCHED_CTXS; i++)
			been_here[i] = 0;

		while(!task)
#endif
		{
			if(worker->nsched_ctxs == 1)
				sched_ctx = _starpu_get_initial_sched_ctx();
			else
			{
				while(1)
				{
					sched_ctx = _get_next_sched_ctx_to_pop_into(worker);

					if(worker->removed_from_ctx[sched_ctx->id] == 1 && worker->shares_tasks_lists[sched_ctx->id] == 1)
					{
						_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
						worker->removed_from_ctx[sched_ctx->id] = 0;
						sched_ctx = NULL;
					}
					else
						break;
				}
			}

			if(sched_ctx && sched_ctx->id != STARPU_NMAX_SCHED_CTXS)
			{
				if (sched_ctx->sched_policy && sched_ctx->sched_policy->pop_task)
				{
					task = sched_ctx->sched_policy->pop_task(sched_ctx->id);
					_starpu_pop_task_end(task);
				}
			}
			
			if(!task)
			{
				/* it doesn't matter if it shares tasks list or not in the scheduler,
				   if it does not have any task to pop just get it out of here */
				/* however if it shares a task list it will be removed as soon as he 
				  finishes this job (in handle_job_termination) */
				if(worker->removed_from_ctx[sched_ctx->id])
				{
					_starpu_worker_gets_out_of_ctx(sched_ctx->id, worker);
					worker->removed_from_ctx[sched_ctx->id] = 0;
				}
#ifdef STARPU_USE_SC_HYPERVISOR
				if(worker->pop_ctx_priority)
				{
					struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;
					if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_idle_cycle && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
					{
//					_STARPU_TRACE_HYPERVISOR_BEGIN();
						perf_counters->notify_idle_cycle(sched_ctx->id, worker->workerid, 1.0);
//					_STARPU_TRACE_HYPERVISOR_END();
					}
				}
#endif //STARPU_USE_SC_HYPERVISOR
				
#ifndef STARPU_NON_BLOCKING_DRIVERS
				if(been_here[sched_ctx->id] || worker->nsched_ctxs == 1)
					break;

				been_here[sched_ctx->id] = 1;

#endif
			}
		}
	  }


	if (!task)
	{
		idle_start[worker->workerid] = starpu_timing_now();
		return NULL;
	}

	if(idle_start[worker->workerid] != 0.0)
	{
		double idle_end = starpu_timing_now();
		idle[worker->workerid] += (idle_end - idle_start[worker->workerid]);
		idle_start[worker->workerid] = 0.0;
	}
	

#ifdef STARPU_USE_SC_HYPERVISOR
	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
	struct starpu_sched_ctx_performance_counters *perf_counters = sched_ctx->perf_counters;

	if(sched_ctx->id != 0 && perf_counters != NULL && perf_counters->notify_poped_task && _starpu_sched_ctx_allow_hypervisor(sched_ctx->id))
	{
//		_STARPU_TRACE_HYPERVISOR_BEGIN();
		perf_counters->notify_poped_task(task->sched_ctx, worker->workerid);
//		_STARPU_TRACE_HYPERVISOR_END();
	}
#endif //STARPU_USE_SC_HYPERVISOR


	/* Make sure we do not bother with all the multiformat-specific code if
	 * it is not necessary. */
	if (!_starpu_task_uses_multiformat_handles(task))
		goto profiling;


	/* This is either a conversion task, or a regular task for which the
	 * conversion tasks have already been created and submitted */
	if (task->mf_skip)
		goto profiling;

	/*
	 * This worker may not be able to execute this task. In this case, we
	 * should return the task anyway. It will be pushed back almost immediatly.
	 * This way, we avoid computing and executing the conversions tasks.
	 * Here, we do not care about what implementation is used.
	 */
	worker_id = starpu_worker_get_id();
	if (!starpu_worker_can_execute_task_first_impl(worker_id, task, NULL))
		return task;

	node = starpu_worker_get_memory_node(worker_id);

	/*
	 * We do have a task that uses multiformat handles. Let's create the
	 * required conversion tasks.
	 */
	STARPU_PTHREAD_MUTEX_UNLOCK(&worker->sched_mutex);
	unsigned i;
	unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
	for (i = 0; i < nbuffers; i++)
	{
		struct starpu_task *conversion_task;
		starpu_data_handle_t handle;

		handle = STARPU_TASK_GET_HANDLE(task, i);
		if (!_starpu_handle_needs_conversion_task(handle, node))
			continue;
		conversion_task = _starpu_create_conversion_task(handle, node);
		conversion_task->mf_skip = 1;
		conversion_task->execute_on_a_specific_worker = 1;
		conversion_task->workerid = worker_id;
		/*
		 * Next tasks will need to know where these handles have gone.
		 */
		handle->mf_node = node;
		_starpu_task_submit_conversion_task(conversion_task, worker_id);
	}

	task->mf_skip = 1;
	starpu_task_list_push_back(&worker->local_tasks, task);
	STARPU_PTHREAD_MUTEX_LOCK(&worker->sched_mutex);
	goto pick;

profiling:
	if (profiling)
	{
		struct starpu_profiling_task_info *profiling_info;
		profiling_info = task->profiling_info;

		/* The task may have been created before profiling was enabled,
		 * so we check if the profiling_info structure is available
		 * even though we already tested if profiling is enabled. */
		if (profiling_info)
		{
			memcpy(&profiling_info->pop_start_time,
				&pop_start_time, sizeof(struct timespec));
			_starpu_clock_gettime(&profiling_info->pop_end_time);
		}
	}

	if(task->prologue_callback_pop_func)
		task->prologue_callback_pop_func(task->prologue_callback_pop_arg);

	return task;
}