Beispiel #1
0
/* The data must be released by calling starpu_data_release later on */
int starpu_data_acquire_cb(starpu_data_handle handle,
		starpu_access_mode mode, void (*callback)(void *), void *arg)
{
	STARPU_ASSERT(handle);

	struct user_interaction_wrapper *wrapper = malloc(sizeof(struct user_interaction_wrapper));
	STARPU_ASSERT(wrapper);

	wrapper->handle = handle;
	wrapper->mode = mode;
	wrapper->callback = callback;
	wrapper->callback_arg = arg;
	PTHREAD_COND_INIT(&wrapper->cond, NULL);
	PTHREAD_MUTEX_INIT(&wrapper->lock, NULL);
	wrapper->finished = 0;

//TODO: instead of having the is_prefetch argument, _starpu_fetch_data shoud consider two flags: async and detached
	_starpu_spin_lock(&handle->header_lock);
	handle->per_node[0].refcnt++;
	_starpu_spin_unlock(&handle->header_lock);

	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
	int sequential_consistency = handle->sequential_consistency;
	if (sequential_consistency)
	{
		wrapper->pre_sync_task = starpu_task_create();
		wrapper->pre_sync_task->callback_func = starpu_data_acquire_cb_pre_sync_callback;
		wrapper->pre_sync_task->callback_arg = wrapper;

		wrapper->post_sync_task = starpu_task_create();

#ifdef STARPU_USE_FXT
                starpu_job_t job = _starpu_get_job_associated_to_task(wrapper->pre_sync_task);
                job->model_name = "acquire_cb_pre";
                job = _starpu_get_job_associated_to_task(wrapper->post_sync_task);
                job->model_name = "acquire_cb_post";
#endif

		_starpu_detect_implicit_data_deps_with_handle(wrapper->pre_sync_task, wrapper->post_sync_task, handle, mode);
		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);

		/* TODO detect if this is superflous */
		int ret = starpu_task_submit(wrapper->pre_sync_task, NULL);
		STARPU_ASSERT(!ret);
	}
	else {
		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);

		starpu_data_acquire_cb_pre_sync_callback(wrapper);
	}

	return 0;
}
static
int ws_push_task(struct starpu_task *task)
{
	unsigned sched_ctx_id = task->sched_ctx;
	struct _starpu_work_stealing_data *ws = (struct _starpu_work_stealing_data*)starpu_sched_ctx_get_policy_data(sched_ctx_id);

	struct _starpu_deque_jobq *deque_queue;
	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
	int workerid = starpu_worker_get_id();

	unsigned worker = 0;
	struct starpu_worker_collection *workers = starpu_sched_ctx_get_worker_collection(sched_ctx_id);
	struct starpu_sched_ctx_iterator it;
	
	workers->init_iterator(workers, &it);
	/* !! C'est ballot de tout locker! */
	while(workers->has_next(workers, &it))
	{
		worker = workers->get_next(workers, &it);
		starpu_pthread_mutex_t *sched_mutex;
		starpu_pthread_cond_t *sched_cond;
		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
		STARPU_PTHREAD_MUTEX_LOCK(sched_mutex);
	}
	
	
	/* If the current thread is not a worker but
	 * the main thread (-1), we find the better one to
	 * put task on its queue */
	if (workerid == -1)
		workerid = select_worker(sched_ctx_id);

	deque_queue = ws->queue_array[workerid];

#ifdef HAVE_AYUDAME_H
	if (AYU_event)
	{
		intptr_t id = workerid;
		AYU_event(AYU_ADDTASKTOQUEUE, j->job_id, &id);
	}
#endif
	_starpu_job_list_push_back(&deque_queue->jobq, j);
	deque_queue->njobs++;
	starpu_push_task_end(task);

	while(workers->has_next(workers, &it))
	{
		worker = workers->get_next(workers, &it);
		starpu_pthread_mutex_t *sched_mutex;
		starpu_pthread_cond_t *sched_cond;
		starpu_worker_get_sched_condition(worker, &sched_mutex, &sched_cond);
#ifndef STARPU_NON_BLOCKING_DRIVERS
		STARPU_PTHREAD_COND_SIGNAL(sched_cond);
#endif
		STARPU_PTHREAD_MUTEX_UNLOCK(sched_mutex);
	}
		
	return 0;
}
int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle,
		     int asynchronous, void (*callback_func)(void*), void *callback_arg,
		     int reduction, struct starpu_task *reduction_dep_task)
{

	struct starpu_task *task = starpu_task_create();
	STARPU_ASSERT(task);
	task->name = "data_cpy";

	struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
	if (reduction)
	{
		j->reduction_task = reduction;
		if (reduction_dep_task)
			starpu_task_declare_deps_array(task, 1, &reduction_dep_task);
	}

	task->cl = &copy_cl;

	unsigned *interface_id = malloc(sizeof(*interface_id));
	*interface_id = dst_handle->ops->interfaceid; 
	task->cl_arg = interface_id;
	task->cl_arg_size = sizeof(*interface_id);
	task->cl_arg_free = 1;

	task->callback_func = callback_func;
	task->callback_arg = callback_arg;

	STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
	STARPU_TASK_SET_HANDLE(task, src_handle, 1);

	task->synchronous = !asynchronous;

	int ret = _starpu_task_submit_internally(task);
	STARPU_ASSERT(!ret);

	return 0;
}
Beispiel #4
0
/* NB : handle->sequential_consistency_mutex must be hold by the caller */
void _starpu_detect_implicit_data_deps_with_handle(struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task,
						starpu_data_handle handle, starpu_access_mode mode)
{
	STARPU_ASSERT(!(mode & STARPU_SCRATCH));

	if (handle->sequential_consistency)
	{
#ifdef STARPU_USE_FXT
		/* In case we are generating the DAG, we add an implicit
		 * dependency between the pre and the post sync tasks in case
		 * they are not the same. */
		if (pre_sync_task != post_sync_task)
		{
			starpu_job_t pre_sync_job = _starpu_get_job_associated_to_task(pre_sync_task);
			starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);
			STARPU_TRACE_GHOST_TASK_DEPS(pre_sync_job->job_id, post_sync_job->job_id);
		}
#endif

		starpu_access_mode previous_mode = handle->last_submitted_mode;
	
		if (mode & STARPU_W)
		{
			_STARPU_DEP_DEBUG("W %p\n", handle);
			if (previous_mode & STARPU_W)
			{
				_STARPU_DEP_DEBUG("WAW %p\n", handle);
				/* (Read) Write */
				/* This task depends on the previous writer */
				if (handle->last_submitted_writer)
				{
               starpu_job_t job = _starpu_get_job_associated_to_task(handle->last_submitted_writer);
					starpu_task_declare_deps_array(pre_sync_task, 1, &job->event);
				}

#ifdef STARPU_USE_FXT
				/* If there is a ghost writer instead, we
				 * should declare a ghost dependency here, and
				 * invalidate the ghost value. */
				if (handle->last_submitted_ghost_writer_id_is_valid)
				{
					starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);
					STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, post_sync_job->job_id);
					handle->last_submitted_ghost_writer_id_is_valid = 0;
				}
#endif
	
				handle->last_submitted_writer = post_sync_task;
			}
			else {
				/* The task submitted previously were in read-only
				 * mode: this task must depend on all those read-only
				 * tasks and we get rid of the list of readers */
			
				_STARPU_DEP_DEBUG("WAR %p\n", handle);
				/* Count the readers */
				unsigned nreaders = 0;
				struct starpu_task_wrapper_list *l;
				l = handle->last_submitted_readers;
				while (l)
				{
					nreaders++;
					l = l->next;
				}
				_STARPU_DEP_DEBUG("%d readers\n", nreaders);

				starpu_event events[nreaders];

				unsigned i = 0;
				l = handle->last_submitted_readers;
				while (l)
				{
					STARPU_ASSERT(l->task);
				   starpu_job_t job = _starpu_get_job_associated_to_task(l->task);
					events[i++] = job->event;

					struct starpu_task_wrapper_list *prev = l;
					l = l->next;
					free(prev);
				}
#ifdef STARPU_USE_FXT
				/* Declare all dependencies with ghost readers */
				starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);

				struct starpu_jobid_list *ghost_readers_id = handle->last_submitted_ghost_readers_id;
				while (ghost_readers_id)
				{
					unsigned long id = ghost_readers_id->id;
					STARPU_TRACE_GHOST_TASK_DEPS(id, post_sync_job->job_id);

					struct starpu_jobid_list *prev = ghost_readers_id;
					ghost_readers_id = ghost_readers_id->next;
					free(prev);
				}
				handle->last_submitted_ghost_readers_id = NULL;
#endif

				handle->last_submitted_readers = NULL;
				handle->last_submitted_writer = post_sync_task;
	
				starpu_task_declare_deps_array(pre_sync_task, nreaders, events);
			}
	
		}
		else {
			_STARPU_DEP_DEBUG("R %p\n", handle);
			/* Add a reader */
			STARPU_ASSERT(pre_sync_task);
			STARPU_ASSERT(post_sync_task);
	
			/* Add this task to the list of readers */
			struct starpu_task_wrapper_list *link = malloc(sizeof(struct starpu_task_wrapper_list));
			link->task = post_sync_task;
			link->next = handle->last_submitted_readers;
			handle->last_submitted_readers = link;

			/* This task depends on the previous writer if any */
			if (handle->last_submitted_writer)
			{
				_STARPU_DEP_DEBUG("RAW %p\n", handle);
				starpu_job_t job = _starpu_get_job_associated_to_task(handle->last_submitted_writer);
				starpu_task_declare_deps_array(pre_sync_task, 1, &job->event);
			}

#ifdef STARPU_USE_FXT
			/* There was perhaps no last submitted writer but a
			 * ghost one, we should report that here, and keep the
			 * ghost writer valid */
			if (handle->last_submitted_ghost_writer_id_is_valid)
			{
				starpu_job_t post_sync_job = _starpu_get_job_associated_to_task(post_sync_task);
				STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, post_sync_job->job_id);
			}
#endif
		}
	
		handle->last_submitted_mode = mode;
	}
}
Beispiel #5
0
/* NB: We maintain a list of "ghost deps" in case FXT is enabled. Ghost
 * dependencies are the dependencies that are implicitely enforced by StarPU
 * even if they do not imply a real dependency. For instance in the following
 * sequence, f(Ar) g(Ar) h(Aw), we expect to have h depend on both f and g, but
 * if h is submitted after the termination of f or g, StarPU will not create a
 * dependency as this is not needed anymore. */
void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *task, starpu_data_handle handle)
{
	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);

	if (handle->sequential_consistency)
	{

		/* If this is the last writer, there is no point in adding
		 * extra deps to that tasks that does not exists anymore */
		if (task == handle->last_submitted_writer)
		{
			handle->last_submitted_writer = NULL;
			
#ifdef STARPU_USE_FXT
			/* Save the previous writer as the ghost last writer */
			handle->last_submitted_ghost_writer_id_is_valid = 1;
			starpu_job_t ghost_job = _starpu_get_job_associated_to_task(task);
			handle->last_submitted_ghost_writer_id = ghost_job->job_id;
#endif
			
		}
		
		/* XXX can a task be both the last writer associated to a data
		 * and be in its list of readers ? If not, we should not go
		 * through the entire list once we have detected it was the
		 * last writer. */

		/* Same if this is one of the readers: we go through the list
		 * of readers and remove the task if it is found. */
		struct starpu_task_wrapper_list *l;
		l = handle->last_submitted_readers;
		struct starpu_task_wrapper_list *prev = NULL;
		while (l)
		{
			struct starpu_task_wrapper_list *next = l->next;

			if (l->task == task)
			{
				/* If we found the task in the reader list */
				free(l);

#ifdef STARPU_USE_FXT
				/* Save the job id of the reader task in the ghost reader linked list list */
				starpu_job_t ghost_reader_job = _starpu_get_job_associated_to_task(task);
				struct starpu_jobid_list *link = malloc(sizeof(struct starpu_jobid_list));
				STARPU_ASSERT(link);
				link->next = handle->last_submitted_ghost_readers_id;
				link->id = ghost_reader_job->job_id; 
				handle->last_submitted_ghost_readers_id = link;
#endif

				if (prev)
				{
					prev->next = next;
				}
				else {
					/* This is the first element of the list */
					handle->last_submitted_readers = next;
				}

				/* XXX can we really find the same task again
				 * once we have found it ? Otherwise, we should
				 * avoid going through the entire list and stop
				 * as soon as we find the task. TODO: check how
				 * duplicate dependencies are treated. */
			}
			else {
				prev = l;
			}

			l = next;
		}
	}

	PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
}
Beispiel #6
0
/* task depends on the tasks in task array */
void _starpu_task_declare_deps_array(struct starpu_task *task, unsigned ndeps, struct starpu_task *task_array[], int check)
{
	if (ndeps == 0)
		return;

	struct _starpu_job *job;

	job = _starpu_get_job_associated_to_task(task);

	STARPU_PTHREAD_MUTEX_LOCK(&job->sync_mutex);
	if (check)
		STARPU_ASSERT_MSG(
				!job->submitted || !task->destroy || task->detach
#ifdef STARPU_OPENMP
				|| job->continuation
#endif
				, "Task dependencies have to be set before submission (submitted %u destroy %d detach %d)", job->submitted, task->destroy, task->detach);
	else
		STARPU_ASSERT_MSG(job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", job->terminated);

	struct _starpu_cg *cg = create_cg_task(ndeps, job);
	STARPU_PTHREAD_MUTEX_UNLOCK(&job->sync_mutex);

	unsigned i;
	for (i = 0; i < ndeps; i++)
	{
		struct starpu_task *dep_task = task_array[i];

		struct _starpu_job *dep_job;
		struct _starpu_cg *back_cg = NULL;

		dep_job = _starpu_get_job_associated_to_task(dep_task);

		STARPU_ASSERT_MSG(dep_job != job, "A task must not depend on itself.");
		STARPU_PTHREAD_MUTEX_LOCK(&dep_job->sync_mutex);
		if (check)
		{
			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->destroy || dep_job->task->detach, "Unless it is not to be destroyed automatically, a task dependencies have to be set before submission");
			STARPU_ASSERT_MSG(dep_job->submitted != 2, "For resubmited tasks, dependencies have to be set before first re-submission");
			STARPU_ASSERT_MSG(!dep_job->submitted || !dep_job->task->regenerate, "For regenerated tasks, dependencies have to be set before first submission");
		}
		else
			STARPU_ASSERT_MSG(dep_job->terminated <= 1, "Task dependencies have to be set before termination (terminated %u)", dep_job->terminated);
		if (dep_job->task->regenerate)
		{
			/* Make sure we don't regenerate the dependency before this task is finished */
			back_cg = create_cg_task(1, dep_job);
			/* Just do not take that dependency into account for the first submission */
			dep_job->job_successors.ndeps_completed++;
		}
		STARPU_PTHREAD_MUTEX_UNLOCK(&dep_job->sync_mutex);

		_STARPU_TRACE_TASK_DEPS(dep_job, job);
		_starpu_bound_task_dep(job, dep_job);
#ifdef HAVE_AYUDAME_H
		if (AYU_event && check)
		{
			uintptr_t AYU_data[3] = {dep_job->job_id, 0, 0};
			AYU_event(AYU_ADDDEPENDENCY, job->job_id, AYU_data);
		}
#endif

		_starpu_task_add_succ(dep_job, cg);
		if (dep_job->task->regenerate)
			_starpu_task_add_succ(job, back_cg);
	}
}
Beispiel #7
0
/* The data must be released by calling starpu_data_release later on */
int starpu_data_acquire(starpu_data_handle handle, starpu_access_mode mode)
{
	STARPU_ASSERT(handle);

	/* it is forbidden to call this function from a callback or a codelet */
	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
		return -EDEADLK;

	struct user_interaction_wrapper wrapper =
	{
		.handle = handle,
		.mode = mode,
		.node = 0, // unused
		.cond = PTHREAD_COND_INITIALIZER,
		.lock = PTHREAD_MUTEX_INITIALIZER,
		.finished = 0
	};

//	_STARPU_DEBUG("TAKE sequential_consistency_mutex starpu_data_acquire\n");
	PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
	int sequential_consistency = handle->sequential_consistency;
	if (sequential_consistency)
	{
		wrapper.pre_sync_task = starpu_task_create();

		wrapper.post_sync_task = starpu_task_create();

#ifdef STARPU_USE_FXT
                starpu_job_t job = _starpu_get_job_associated_to_task(wrapper.pre_sync_task);
                job->model_name = "acquire_pre";
                job = _starpu_get_job_associated_to_task(wrapper.post_sync_task);
                job->model_name = "acquire_post";
#endif

		_starpu_detect_implicit_data_deps_with_handle(wrapper.pre_sync_task, wrapper.post_sync_task, handle, mode);
		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);

		/* TODO detect if this is superflous */
		wrapper.pre_sync_task->synchronous = 1;
		int ret = starpu_task_submit(wrapper.pre_sync_task, NULL);
		STARPU_ASSERT(!ret);
		/* starpu_event event;
         int ret = starpu_task_submit(wrapper.pre_sync_task, &event);
		   STARPU_ASSERT(!ret);
         starpu_event_wait(event);
         starpu_event_release(event);
      */
	}
	else {
		PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
	}

	/* we try to get the data, if we do not succeed immediately, we set a
 	* callback function that will be executed automatically when the data is
 	* available again, otherwise we fetch the data directly */
	if (!_starpu_attempt_to_submit_data_request_from_apps(handle, mode,
			_starpu_data_acquire_continuation, &wrapper))
	{
		/* no one has locked this data yet, so we proceed immediately */
		int ret = _starpu_fetch_data_on_node(handle, 0, mode, 0, NULL, NULL);
		STARPU_ASSERT(!ret);
	}
	else {
		PTHREAD_MUTEX_LOCK(&wrapper.lock);
		while (!wrapper.finished)
			PTHREAD_COND_WAIT(&wrapper.cond, &wrapper.lock);
		PTHREAD_MUTEX_UNLOCK(&wrapper.lock);
	}

	/* At that moment, the caller holds a reference to the piece of data.
	 * We enqueue the "post" sync task in the list associated to the handle
	 * so that it is submitted by the starpu_data_release
	 * function. */
	_starpu_add_post_sync_tasks(wrapper.post_sync_task, handle);

	return 0;
}

/* This function must be called after starpu_data_acquire so that the
 * application release the data */
void starpu_data_release(starpu_data_handle handle)
{
	STARPU_ASSERT(handle);

	/* The application can now release the rw-lock */
	_starpu_release_data_on_node(handle, 0, 0);

	/* In case there are some implicit dependencies, unlock the "post sync" tasks */
	_starpu_unlock_post_sync_tasks(handle);
}
Beispiel #8
0
void *_starpu_cpu_worker(void *arg)
{
   struct starpu_worker_s *cpu_arg = arg;
   unsigned memnode = cpu_arg->memory_node;
   int workerid = cpu_arg->workerid;
   int devid = cpu_arg->devid;

#ifdef STARPU_USE_FXT
   _starpu_fxt_register_thread(cpu_arg->bindid);
#endif
   STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode);

   _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid);

   _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid);

   _starpu_set_local_memory_node_key(&memnode);

   _starpu_set_local_worker_key(cpu_arg);

   snprintf(cpu_arg->name, 32, "CPU %d", devid);

   cpu_arg->status = STATUS_UNKNOWN;

   STARPU_TRACE_WORKER_INIT_END

      /* tell the main thread that we are ready */
      PTHREAD_MUTEX_LOCK(&cpu_arg->mutex);
   cpu_arg->worker_is_initialized = 1;
   PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond);
   PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex);

   starpu_job_t j;
   int res;

   while (_starpu_machine_is_running())
   {
      STARPU_TRACE_START_PROGRESS(memnode);
      _starpu_datawizard_progress(memnode, 1);
      STARPU_TRACE_END_PROGRESS(memnode);

      _starpu_execute_registered_progression_hooks();

      PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex);

      /* perhaps there is some local task to be executed first */
      j = _starpu_pop_local_task(cpu_arg);

      /* otherwise ask a task to the scheduler */
      if (!j)
      {
         struct starpu_task *task = _starpu_pop_task();
         if (task)
            j = _starpu_get_job_associated_to_task(task);
      }

      if (j == NULL) 
      {
         if (_starpu_worker_can_block(memnode))
            _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex);

         PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);

         continue;
      };

      PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);

      /* can a cpu perform that task ? */
      if (!STARPU_CPU_MAY_PERFORM(j)) 
      {
         /* put it and the end of the queue ... XXX */
         _starpu_push_task(j, 0);
         continue;
      }

      _starpu_set_current_task(j->task);

      res = execute_job_on_cpu(j, cpu_arg);

      _starpu_set_current_task(NULL);

      if (res) {
         switch (res) {
            case -EAGAIN:
               _starpu_push_task(j, 0);
               continue;
            default: 
               assert(0);
         }
      }

      _starpu_handle_job_termination(j, 0);
   }

   STARPU_TRACE_WORKER_DEINIT_START

      /* In case there remains some memory that was automatically
       * allocated by StarPU, we release it now. Note that data
       * coherency is not maintained anymore at that point ! */
      _starpu_free_all_automatically_allocated_buffers(memnode);

   STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY);

   pthread_exit(NULL);
}
int _starpu_push_task_to_workers(struct starpu_task *task)
{
	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(task->sched_ctx);
	unsigned nworkers = 0;

	_STARPU_TRACE_JOB_PUSH(task, task->priority > 0);

	/* if the contexts still does not have workers put the task back to its place in
	   the empty ctx list */
	if(!sched_ctx->is_initial_sched)
	{
		/*if there are workers in the ctx that are not able to execute tasks
		  we consider the ctx empty */
		nworkers = _starpu_nworkers_able_to_execute_task(task, sched_ctx);

		if (nworkers == 0)
		{
			STARPU_PTHREAD_MUTEX_LOCK(&sched_ctx->empty_ctx_mutex);
			starpu_task_list_push_back(&sched_ctx->empty_ctx_tasks, task);
			STARPU_PTHREAD_MUTEX_UNLOCK(&sched_ctx->empty_ctx_mutex);
#ifdef STARPU_USE_SC_HYPERVISOR
			if(sched_ctx != NULL && sched_ctx->id != 0 && sched_ctx->perf_counters != NULL 
			   && sched_ctx->perf_counters->notify_empty_ctx)
			{
				_STARPU_TRACE_HYPERVISOR_BEGIN();
				sched_ctx->perf_counters->notify_empty_ctx(sched_ctx->id, task);
				_STARPU_TRACE_HYPERVISOR_END();
			}
#endif

			return -EAGAIN;
		}
	}

	_starpu_profiling_set_task_push_start_time(task);

	int ret = 0;
	if (STARPU_UNLIKELY(task->execute_on_a_specific_worker))
	{
		unsigned node = starpu_worker_get_memory_node(task->workerid);
		if (starpu_get_prefetch_flag())
			starpu_prefetch_task_input_on_node(task, node);

		ret = _starpu_push_task_on_specific_worker(task, task->workerid);
	}
	else
	{
		struct _starpu_machine_config *config = _starpu_get_machine_config();

		/* When a task can only be executed on a given arch and we have
		 * only one memory node for that arch, we can systematically
		 * prefetch before the scheduling decision. */
		if (starpu_get_prefetch_flag())
		{
			if (task->cl->where == STARPU_CPU && config->cpus_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->cpus_nodeid);
			else if (task->cl->where == STARPU_CUDA && config->cuda_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->cuda_nodeid);
			else if (task->cl->where == STARPU_OPENCL && config->opencl_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->opencl_nodeid);
			else if (task->cl->where == STARPU_MIC && config->mic_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->mic_nodeid);
			else if (task->cl->where == STARPU_SCC && config->scc_nodeid >= 0)
				starpu_prefetch_task_input_on_node(task, config->scc_nodeid);
		}

		if(!sched_ctx->sched_policy)
		{
			/* Note: we have to call that early, or else the task may have
			 * disappeared already */
			starpu_push_task_end(task);
			if(!sched_ctx->awake_workers)
				ret = _starpu_push_task_on_specific_worker(task, sched_ctx->main_master);
			else
			{
				struct starpu_worker_collection *workers = sched_ctx->workers;
				
				struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
				job->task_size = workers->nworkers;
				job->combined_workerid = -1; // workerid; its a ctx not combined worker
				job->active_task_alias_count = 0;

				STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, workers->nworkers);
				STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, workers->nworkers);
				job->after_work_busy_barrier = workers->nworkers;

				unsigned workerid;
				struct starpu_sched_ctx_iterator it;
				if(workers->init_iterator)
					workers->init_iterator(workers, &it);

				while(workers->has_next(workers, &it))
				{
					workerid = workers->get_next(workers, &it);
					struct starpu_task *alias = starpu_task_dup(task);
					alias->destroy = 1;
					ret |= _starpu_push_task_on_specific_worker(alias, workerid);
				}
			}
		}
		else
		{
			STARPU_ASSERT(sched_ctx->sched_policy->push_task);
			/* check out if there are any workers in the context */
			starpu_pthread_rwlock_t *changing_ctx_mutex = _starpu_sched_ctx_get_changing_ctx_mutex(sched_ctx->id);
			STARPU_PTHREAD_RWLOCK_RDLOCK(changing_ctx_mutex);
			nworkers = starpu_sched_ctx_get_nworkers(sched_ctx->id);
			if (nworkers == 0)
				ret = -1;
			else
			{
				_STARPU_TRACE_WORKER_SCHEDULING_PUSH;
				ret = sched_ctx->sched_policy->push_task(task);
				_STARPU_TRACE_WORKER_SCHEDULING_POP;
			}
			STARPU_PTHREAD_RWLOCK_UNLOCK(changing_ctx_mutex);
		}

		if(ret == -1)
		{
			fprintf(stderr, "repush task \n");
			_STARPU_TRACE_JOB_POP(task, task->priority > 0);
			ret = _starpu_push_task_to_workers(task);
		}
	}
	/* Note: from here, the task might have been destroyed already! */
	_STARPU_LOG_OUT();
	return ret;

}
/* Enqueue a task into the list of tasks explicitely attached to a worker. In
 * case workerid identifies a combined worker, a task will be enqueued into
 * each worker of the combination. */
static int _starpu_push_task_on_specific_worker(struct starpu_task *task, int workerid)
{
	int nbasic_workers = (int)starpu_worker_get_count();

	/* Is this a basic worker or a combined worker ? */
	int is_basic_worker = (workerid < nbasic_workers);

	unsigned memory_node;
	struct _starpu_worker *worker = NULL;
	struct _starpu_combined_worker *combined_worker = NULL;

	if (is_basic_worker)
	{
		worker = _starpu_get_worker_struct(workerid);
		memory_node = worker->memory_node;
	}
	else
	{
		combined_worker = _starpu_get_combined_worker_struct(workerid);
		memory_node = combined_worker->memory_node;
	}

	if (use_prefetch)
		starpu_prefetch_task_input_on_node(task, memory_node);

	if (is_basic_worker)
		_starpu_push_task_on_specific_worker_notify_sched(task, worker, workerid, workerid);
	else
	{
		/* Notify all workers of the combined worker */
		int worker_size = combined_worker->worker_size;
		int *combined_workerid = combined_worker->combined_workerid;

		int j;
		for (j = 0; j < worker_size; j++)
		{
			int subworkerid = combined_workerid[j];
			_starpu_push_task_on_specific_worker_notify_sched(task, _starpu_get_worker_struct(subworkerid), subworkerid, workerid);
		}
	}

#ifdef STARPU_USE_SC_HYPERVISOR
	starpu_sched_ctx_call_pushed_task_cb(workerid, task->sched_ctx);
#endif //STARPU_USE_SC_HYPERVISOR
	unsigned i;
	if (is_basic_worker)
	{
		unsigned node = starpu_worker_get_memory_node(workerid);
		if (_starpu_task_uses_multiformat_handles(task))
		{
			unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
			for (i = 0; i < nbuffers; i++)
			{
				struct starpu_task *conversion_task;
				starpu_data_handle_t handle;

				handle = STARPU_TASK_GET_HANDLE(task, i);
				if (!_starpu_handle_needs_conversion_task(handle, node))
					continue;

				conversion_task = _starpu_create_conversion_task(handle, node);
				conversion_task->mf_skip = 1;
				conversion_task->execute_on_a_specific_worker = 1;
				conversion_task->workerid = workerid;
				_starpu_task_submit_conversion_task(conversion_task, workerid);
				//_STARPU_DEBUG("Pushing a conversion task\n");
			}

			for (i = 0; i < nbuffers; i++)
			{
				starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, i);
				handle->mf_node = node;
			}
		}
//		if(task->sched_ctx != _starpu_get_initial_sched_ctx()->id)

		if(task->priority > 0)
			return _starpu_push_local_task(worker, task, 1);
		else
			return _starpu_push_local_task(worker, task, 0);
	}
	else
	{
		/* This is a combined worker so we create task aliases */
		int worker_size = combined_worker->worker_size;
		int *combined_workerid = combined_worker->combined_workerid;

		int ret = 0;

		struct _starpu_job *job = _starpu_get_job_associated_to_task(task);
		job->task_size = worker_size;
		job->combined_workerid = workerid;
		job->active_task_alias_count = 0;

		STARPU_PTHREAD_BARRIER_INIT(&job->before_work_barrier, NULL, worker_size);
		STARPU_PTHREAD_BARRIER_INIT(&job->after_work_barrier, NULL, worker_size);
		job->after_work_busy_barrier = worker_size;

		/* Note: we have to call that early, or else the task may have
		 * disappeared already */
		starpu_push_task_end(task);

		int j;
		for (j = 0; j < worker_size; j++)
		{
			struct starpu_task *alias = starpu_task_dup(task);
			alias->destroy = 1;

			worker = _starpu_get_worker_struct(combined_workerid[j]);
			ret |= _starpu_push_local_task(worker, alias, 0);
		}

		return ret;
	}
}