コード例 #1
0
static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers)
{
	struct _starpu_machine_config *config = _starpu_get_machine_config();
	struct _starpu_machine_topology *topology = &config->topology;
	int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER");

	int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1)
		max = INT_MAX;

	if (synthesize_arity == -1)
		synthesize_arity = 2;

	/* First, mark nodes which contain CPU workers, simply by setting their userdata field */
	int i;
	for (i = 0; i < nworkers; i++)
	{
		struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1)
		{
			hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid);
			obj = obj->parent;
			while (obj)
			{
				obj->userdata = (void*) -1;
				obj = obj->parent;
			}
		}
	}
	find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity);
}
コード例 #2
0
static void find_and_assign_combinations(hwloc_obj_t obj, unsigned min, unsigned max, unsigned synthesize_arity)
{
	char name[64];
	unsigned i, n, nworkers;
	int cpu_workers[STARPU_NMAXWORKERS];

	struct _starpu_machine_config *config = _starpu_get_machine_config();
	struct _starpu_machine_topology *topology = &config->topology;

	hwloc_obj_snprintf(name, sizeof(name), topology->hwtopology, obj, "#", 0);
	_STARPU_DEBUG("Looking at %s\n", name);

	for (n = 0, i = 0; i < obj->arity; i++)
		if (obj->children[i]->userdata)
			/* it has a CPU worker */
			n++;

	if (n == 1)
	{
		/* If there is only one child, we go to the next level right away */
		find_and_assign_combinations(obj->children[0], min, max, synthesize_arity);
		return;
	}

	/* Add this object */
	nworkers = 0;
	find_workers(obj, cpu_workers, &nworkers);

	if (nworkers >= min && nworkers <= max)
	{
		_STARPU_DEBUG("Adding it\n");
		unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
		if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
			sched_ctx_id = 0;

		struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);

		int newworkerid = starpu_combined_worker_assign_workerid(nworkers, cpu_workers);
		STARPU_ASSERT(newworkerid >= 0);
		workers->add(workers,newworkerid);
	}

	/* Add artificial intermediate objects recursively */
	synthesize_intermediate_workers(obj->children, min, max, obj->arity, n, synthesize_arity);

	/* And recurse */
	for (i = 0; i < obj->arity; i++)
		if (obj->children[i]->userdata == (void*) -1)
			find_and_assign_combinations(obj->children[i], min, max, synthesize_arity);
}
コード例 #3
0
void _starpu_sched_find_worker_combinations(int *workerids, int nworkers)
{
	struct _starpu_machine_config *config = _starpu_get_machine_config();

	if (config->conf->single_combined_worker > 0)
		combine_all_cpu_workers(workerids, nworkers);
	else
	{
#ifdef STARPU_HAVE_HWLOC
		find_and_assign_combinations_with_hwloc(workerids, nworkers);
#else
		find_and_assign_combinations_without_hwloc(workerids, nworkers);
#endif
	}
}
コード例 #4
0
unsigned starpu_worker_get_memory_node(unsigned workerid)
{
	struct _starpu_machine_config *config = _starpu_get_machine_config();

	/* This workerid may either be a basic worker or a combined worker */
	unsigned nworkers = config->topology.nworkers;

	if (workerid < config->topology.nworkers)
		return config->workers[workerid].memory_node;

	/* We have a combined worker */
	unsigned ncombinedworkers = config->topology.ncombinedworkers;
	STARPU_ASSERT_MSG(workerid < ncombinedworkers + nworkers, "Bad workerid %u, maximum %u", workerid, ncombinedworkers + nworkers);
	return config->combined_workers[workerid - nworkers].memory_node;

}
コード例 #5
0
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers)
{
	int i;
	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
		sched_ctx_id = 0;
	int min, max;
#ifdef STARPU_USE_MIC
	unsigned j;
	int mic_min, mic_max;
#endif

	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);

	/* We put the id of all CPU workers in this array */
	int cpu_workers[STARPU_NMAXWORKERS];
	unsigned ncpus = 0;
#ifdef STARPU_USE_MIC
	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
	unsigned * nmics_table;
	int * mic_id;
	int ** mic_workers;
	mic_id = malloc(sizeof(int)*nb_mics);
	nmics_table = malloc(sizeof(unsigned)*nb_mics);
	mic_workers = malloc(sizeof(int*)*nb_mics);
	for(j=0; j<nb_mics; j++)
	{
		mic_id[j] = -1;
		nmics_table[j] = 0;
		mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS);
	}
#endif /* STARPU_USE_MIC */

	struct _starpu_worker *worker;
	for (i = 0; i < nworkers; i++)
	{
		worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->arch == STARPU_CPU_WORKER)
			cpu_workers[ncpus++] = i;
#ifdef STARPU_USE_MIC
		else if(worker->arch == STARPU_MIC_WORKER)
		{
			for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++);
			if(j<nb_mics)
			{
				if(mic_id[j] == -1)
				{
					mic_id[j] = worker->devid;					
				}
				mic_workers[j][nmics_table[j]++] = i;
			}
		}
#endif /* STARPU_USE_MIC */

	}


	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1 || max > (int) ncpus)
		max = ncpus;
	
	assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max);
#ifdef STARPU_USE_MIC
	mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (mic_min < 2)
		mic_min = 2;
	for(j=0; j<nb_mics; j++)
	{
		mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
		if (mic_max == -1 || mic_max > (int) nmics_table[j])
			mic_max = nmics_table[j];
		assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max);
		free(mic_workers[j]);
	}
	free(mic_id);
	free(nmics_table);
	free(mic_workers);
#endif /* STARPU_USE_MIC */
}
コード例 #6
0
int _starpu_push_task_to_workers(struct starpu_task *task)
{
	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
	unsigned nworkers = 0;

	_STARPU_TRACE_JOB_PUSH(task, task->priority > 0);

	/* if the contexts still does not have workers put the task back to its place in
	   the empty ctx list */
	if(!sched_ctx->is_initial_sched)
	{
		/*if there are workers in the ctx that are not able to execute tasks
		  we consider the ctx empty */
		nworkers = _starpu_nworkers_able_to_execute_task(task, sched_ctx);

		if (nworkers == 0)
		{
			STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->empty_ctx_mutex);
			starpu_task_list_push_back(&sched_ctx->empty_ctx_tasks, task);
			STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);
#ifdef STARPU_USE_SC_HYPERVISOR
			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
			   && sched_ctx->perf_counters->notify_empty_ctx)
			{
				_STARPU_TRACE_HYPERVISOR_BEGIN();
				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
				_STARPU_TRACE_HYPERVISOR_END();
			}
#endif

			return -EAGAIN;
		}
	}

	_starpu_profiling_set_task_push_start_time(task);

	int ret = 0;
	if (STARPU_UNLIKELY(task->execute_on_a_specific_worker))
	{
		unsigned node = starpu_worker_get_memory_node(task->workerid);
		if (starpu_get_prefetch_flag())
			starpu_prefetch_task_input_on_node(task, node);

		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
	}
	else
	{
		struct _starpu_machine_config *config = _starpu_get_machine_config();

		/* When a task can only be executed on a given arch and we have
		 * only one memory node for that arch, we can systematically
		 * prefetch before the scheduling decision. */
		if (starpu_get_prefetch_flag())
		{
			if (task->cl->where == STARPU_CPU && config->cpus_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
			else if (task->cl->where == STARPU_CUDA && config->cuda_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
			else if (task->cl->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
			else if (task->cl->where == STARPU_MIC && config->mic_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
			else if (task->cl->where == STARPU_SCC && config->scc_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->scc_nodeid);
		}

		if(!sched_ctx->sched_policy)
		{
			/* Note: we have to call that early, or else the task may have
			 * disappeared already */
			starpu_push_task_end(task);
			if(!sched_ctx->awake_workers)
				ret = _starpu_push_task_on_specific_worker(task, sched_ctx->main_master);
			else
			{
				struct starpu_worker_collection *workers = sched_ctx->workers;
				
				struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
				job->task_size = workers->nworkers;
				job->combined_workerid = -1; // workerid; its a ctx not combined worker
				job->active_task_alias_count = 0;

				STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, workers->nworkers);
				STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, workers->nworkers);
				job->after_work_busy_barrier = workers->nworkers;

				unsigned workerid;
				struct starpu_sched_ctx_iterator it;
				if(workers->init_iterator)
					workers->init_iterator(workers, &it);

				while(workers->has_next(workers, &it))
				{
					workerid = workers->get_next(workers, &it);
					struct starpu_task *alias = starpu_task_dup(task);
					alias->destroy = 1;
					ret |= _starpu_push_task_on_specific_worker(alias, workerid);
				}
			}
		}
		else
		{
			STARPU_ASSERT(sched_ctx->sched_policy->push_task);
			/* check out if there are any workers in the context */
			starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
			STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex);
			nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id);
			if (nworkers == 0)
				ret = -1;
			else
			{
				_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
				ret = sched_ctx->sched_policy->push_task(task);
				_STARPU_TRACE_WORKER_SCHEDULING_POP;
			}
			STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex);
		}

		if(ret == -1)
		{
			fprintf(stderr, "repush task \n");
			_STARPU_TRACE_JOB_POP(task, task->priority > 0);
			ret = _starpu_push_task_to_workers(task);
		}
	}
	/* Note: from here, the task might have been destroyed already! */
	_STARPU_LOG_OUT();
	return ret;

}