Exemplo n.º 1
0
/* the generic interface that call the proper underlying implementation */
int _starpu_push_task(starpu_job_t j, unsigned job_is_already_locked)
{
	struct starpu_task *task = j->task;

	task->status = STARPU_TASK_READY;

	/* in case there is no codelet associated to the task (that's a control
	 * task), we directly execute its callback and enforce the
	 * corresponding dependencies */
	if (task->cl == NULL)
	{
		_starpu_handle_job_termination(j, job_is_already_locked);
		return 0;
	}

	if (STARPU_UNLIKELY(task->execute_on_a_specific_worker))
	{
		unsigned workerid = task->workerid;
		struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
		
		if (use_prefetch)
		{
			uint32_t memory_node = starpu_worker_get_memory_node(workerid); 
			_starpu_prefetch_task_input_on_node(task, memory_node);
		}

		return _starpu_push_local_task(worker, j);
	}
	else {
		STARPU_ASSERT(policy.push_task);

		return policy.push_task(task);
	}
}
static void find_and_assign_combinations_with_hwloc(int *workerids, int nworkers)
{
	struct _starpu_machine_config *config = _starpu_get_machine_config();
	struct _starpu_machine_topology *topology = &config->topology;
	int synthesize_arity = starpu_get_env_number("STARPU_SYNTHESIZE_ARITY_COMBINED_WORKER");

	int min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	int max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1)
		max = INT_MAX;

	if (synthesize_arity == -1)
		synthesize_arity = 2;

	/* First, mark nodes which contain CPU workers, simply by setting their userdata field */
	int i;
	for (i = 0; i < nworkers; i++)
	{
		struct _starpu_worker *worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->perf_arch.devices[0].type == STARPU_CPU_WORKER && worker->perf_arch.devices[0].ncores == 1)
		{
			hwloc_obj_t obj = hwloc_get_obj_by_depth(topology->hwtopology, config->pu_depth, worker->bindid);
			obj = obj->parent;
			while (obj)
			{
				obj->userdata = (void*) -1;
				obj = obj->parent;
			}
		}
	}
	find_and_assign_combinations(hwloc_get_root_obj(topology->hwtopology), min, max, synthesize_arity);
}
static void combine_all_cpu_workers(int *workerids, int nworkers)
{
	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
		sched_ctx_id = 0;
	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
	int cpu_workers[STARPU_NMAXWORKERS];
	int ncpus = 0;
	struct _starpu_worker *worker;
	int i;
	int min;
	int max;

	for (i = 0; i < nworkers; i++)
	{
		worker = _starpu_get_worker_struct(workerids[i]);

		if (worker->arch == STARPU_CPU_WORKER)
			cpu_workers[ncpus++] = workerids[i];
	}

	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 1)
		min = 1;
	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1 || max > ncpus)
		max = ncpus;

	for (i = min; i <= max; i++)
	{
		int newworkerid;
		newworkerid = starpu_combined_worker_assign_workerid(i, cpu_workers);
		STARPU_ASSERT(newworkerid >= 0);
		workers->add(workers, newworkerid);
	}
}
static void find_and_assign_combinations_without_hwloc(int *workerids, int nworkers)
{
	int i;
	unsigned sched_ctx_id  = starpu_sched_ctx_get_context();
	if(sched_ctx_id == STARPU_NMAX_SCHED_CTXS)
		sched_ctx_id = 0;
	int min, max;
#ifdef STARPU_USE_MIC
	unsigned j;
	int mic_min, mic_max;
#endif

	struct starpu_worker_collection* workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);

	/* We put the id of all CPU workers in this array */
	int cpu_workers[STARPU_NMAXWORKERS];
	unsigned ncpus = 0;
#ifdef STARPU_USE_MIC
	unsigned nb_mics = _starpu_get_machine_config()->topology.nmicdevices;
	unsigned * nmics_table;
	int * mic_id;
	int ** mic_workers;
	mic_id = malloc(sizeof(int)*nb_mics);
	nmics_table = malloc(sizeof(unsigned)*nb_mics);
	mic_workers = malloc(sizeof(int*)*nb_mics);
	for(j=0; j<nb_mics; j++)
	{
		mic_id[j] = -1;
		nmics_table[j] = 0;
		mic_workers[j] = malloc(sizeof(int)*STARPU_NMAXWORKERS);
	}
#endif /* STARPU_USE_MIC */

	struct _starpu_worker *worker;
	for (i = 0; i < nworkers; i++)
	{
		worker = _starpu_get_worker_struct(workerids[i]);
		if (worker->arch == STARPU_CPU_WORKER)
			cpu_workers[ncpus++] = i;
#ifdef STARPU_USE_MIC
		else if(worker->arch == STARPU_MIC_WORKER)
		{
			for(j=0; mic_id[j] != worker->devid && mic_id[j] != -1 && j<nb_mics; j++);
			if(j<nb_mics)
			{
				if(mic_id[j] == -1)
				{
					mic_id[j] = worker->devid;					
				}
				mic_workers[j][nmics_table[j]++] = i;
			}
		}
#endif /* STARPU_USE_MIC */

	}


	min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (min < 2)
		min = 2;
	max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
	if (max == -1 || max > (int) ncpus)
		max = ncpus;
	
	assign_combinations_without_hwloc(workers,cpu_workers,ncpus,min,max);
#ifdef STARPU_USE_MIC
	mic_min = starpu_get_env_number("STARPU_MIN_WORKERSIZE");
	if (mic_min < 2)
		mic_min = 2;
	for(j=0; j<nb_mics; j++)
	{
		mic_max = starpu_get_env_number("STARPU_MAX_WORKERSIZE");
		if (mic_max == -1 || mic_max > (int) nmics_table[j])
			mic_max = nmics_table[j];
		assign_combinations_without_hwloc(workers,mic_workers[j],nmics_table[j],mic_min,mic_max);
		free(mic_workers[j]);
	}
	free(mic_id);
	free(nmics_table);
	free(mic_workers);
#endif /* STARPU_USE_MIC */
}
/* Enqueue a task into the list of tasks explicitely attached to a worker. In
 * case workerid identifies a combined worker, a task will be enqueued into
 * each worker of the combination. */
static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid)
{
	int nbasic_workers = (int)starpu_worker_get_count();

	/* Is this a basic worker or a combined worker ? */
	int is_basic_worker = (workerid < nbasic_workers);

	unsigned memory_node;
	struct _starpu_worker *worker = NULL;
	struct _starpu_combined_worker *combined_worker = NULL;

	if (is_basic_worker)
	{
		worker = _starpu_get_worker_struct(workerid);
		memory_node = worker->memory_node;
	}
	else
	{
		combined_worker = _starpu_get_combined_worker_struct(workerid);
		memory_node = combined_worker->memory_node;
	}

	if (use_prefetch)
		starpu_prefetch_task_input_on_node(task, memory_node);

	if (is_basic_worker)
		_starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid);
	else
	{
		/* Notify all workers of the combined worker */
		int worker_size = combined_worker->worker_size;
		int *combined_workerid = combined_worker->combined_workerid;

		int j;
		for (j = 0; j < worker_size; j++)
		{
			int subworkerid = combined_workerid[j];
			_starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid);
		}
	}

#ifdef STARPU_USE_SC_HYPERVISOR
	starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx);
#endif //STARPU_USE_SC_HYPERVISOR
	unsigned i;
	if (is_basic_worker)
	{
		unsigned node = starpu_worker_get_memory_node(workerid);
		if (_starpu_task_uses_multiformat_handles(task))
		{
			unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
			for (i = 0; i < nbuffers; i++)
			{
				struct starpu_task *conversion_task;
				starpu_data_handle_t handle;

				handle = STARPU_TASK_GET_HANDLE(task, i);
				if (!_starpu_handle_needs_conversion_task(handle, node))
					continue;

				conversion_task = _starpu_create_conversion_task(handle, node);
				conversion_task->mf_skip = 1;
				conversion_task->execute_on_a_specific_worker = 1;
				conversion_task->workerid = workerid;
				_starpu_task_submit_conversion_task(conversion_task, workerid);
				//_STARPU_DEBUG("Pushing a conversion task\n");
			}

			for (i = 0; i < nbuffers; i++)
			{
				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
				handle->mf_node = node;
			}
		}
//		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)

		if(task->priority > 0)
			return _starpu_push_local_task(worker, task, 1);
		else
			return _starpu_push_local_task(worker, task, 0);
	}
	else
	{
		/* This is a combined worker so we create task aliases */
		int worker_size = combined_worker->worker_size;
		int *combined_workerid = combined_worker->combined_workerid;

		int ret = 0;

		struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
		job->task_size = worker_size;
		job->combined_workerid = workerid;
		job->active_task_alias_count = 0;

		STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size);
		STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size);
		job->after_work_busy_barrier = worker_size;

		/* Note: we have to call that early, or else the task may have
		 * disappeared already */
		starpu_push_task_end(task);

		int j;
		for (j = 0; j < worker_size; j++)
		{
			struct starpu_task *alias = starpu_task_dup(task);
			alias->destroy = 1;

			worker = _starpu_get_worker_struct(combined_workerid[j]);
			ret |= _starpu_push_local_task(worker, alias, 0);
		}

		return ret;
	}
}
/* The scheduling policy may put tasks directly into a worker's local queue so
 * that it is not always necessary to create its own queue when the local queue
 * is sufficient. If "back" not null, the task is put at the back of the queue
 * where the worker will pop tasks first. Setting "back" to 0 therefore ensures
 * a FIFO ordering. */
int starpu_push_local_task(int workerid, struct starpu_task *task, int prio)
{
	struct _starpu_worker *worker = _starpu_get_worker_struct(workerid);

	return  _starpu_push_local_task(worker, task, prio);
}