Beispiel #1
0
static void lock_all_subtree(starpu_data_handle handle)
{
	if (handle->nchildren == 0)
	{
		/* this is a leaf */
		while (_starpu_spin_trylock(&handle->header_lock))
			_starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
	}
	else {
		/* lock all sub-subtrees children */
		unsigned child;
		for (child = 0; child < handle->nchildren; child++)
		{
			lock_all_subtree(&handle->children[child]);
		}
	}
}
Beispiel #2
0
int _starpu_wait_data_request_completion(starpu_data_request_t r, unsigned may_alloc)
{
	int retval;
	int do_delete = 0;

	uint32_t local_node = _starpu_get_local_memory_node();

	do {
		_starpu_spin_lock(&r->lock);

		if (r->completed)
			break;

		_starpu_spin_unlock(&r->lock);

#ifndef STARPU_NON_BLOCKING_DRIVERS
		_starpu_wake_all_blocked_workers_on_node(r->handling_node);
#endif

		_starpu_datawizard_progress(local_node, may_alloc);

	} while (1);


	retval = r->retval;
	if (retval)
		_STARPU_DISP("REQUEST %p COMPLETED (retval %d) !\n", r, r->retval);
		

	r->refcnt--;

	/* if nobody is waiting on that request, we can get rid of it */
	if (r->refcnt == 0)
		do_delete = 1;

	_starpu_spin_unlock(&r->lock);
	
	if (do_delete)
		starpu_data_request_destroy(r);
	
	return retval;
}
/* No lock is held, this acquires and releases the handle header lock */
static unsigned _starpu_attempt_to_submit_data_request(unsigned request_from_codelet,
						       starpu_data_handle_t handle, enum starpu_data_access_mode mode,
						       void (*callback)(void *), void *argcb,
						       struct _starpu_job *j, unsigned buffer_index)
{
	if (handle->arbiter)
		return _starpu_attempt_to_submit_arbitered_data_request(request_from_codelet, handle, mode, callback, argcb, j, buffer_index);

	if (mode == STARPU_RW)
		mode = STARPU_W;

	/* Take the lock protecting the header. We try to do some progression
	 * in case this is called from a worker, otherwise we just wait for the
	 * lock to be available. */
	if (request_from_codelet)
	{
		int cpt = 0;
		while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
		{
			cpt++;
			_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
		}
		if (cpt == STARPU_SPIN_MAXTRY)
			_starpu_spin_lock(&handle->header_lock);
	}
	else
	{
		_starpu_spin_lock(&handle->header_lock);
	}

	/* If we have a request that is not used for the reduction, and that a
	 * reduction is pending, we put it at the end of normal list, and we
	 * use the reduction_req_list instead */
	unsigned pending_reduction = (handle->reduction_refcnt > 0);
	unsigned frozen = 0;

	/* If we are currently performing a reduction, we freeze any request
	 * that is not explicitely a reduction task. */
	unsigned is_a_reduction_task = (request_from_codelet && j->reduction_task);

	if (pending_reduction && !is_a_reduction_task)
		frozen = 1;

	/* If there is currently nobody accessing the piece of data, or it's
	 * not another writter and if this is the same type of access as the
	 * current one, we can proceed. */
	unsigned put_in_list = 1;

	enum starpu_data_access_mode previous_mode = handle->current_mode;

	if (!frozen && ((handle->refcnt == 0) || (!(mode == STARPU_W) && (handle->current_mode == mode))))
	{
		/* Detect whether this is the end of a reduction phase */
			/* We don't want to start multiple reductions of the
			 * same handle at the same time ! */

		if ((handle->reduction_refcnt == 0) && (previous_mode == STARPU_REDUX) && (mode != STARPU_REDUX))
		{
			_starpu_data_end_reduction_mode(handle);

			/* Since we need to perform a mode change, we freeze
			 * the request if needed. */
			put_in_list = (handle->reduction_refcnt > 0);
		}
		else
		{
			put_in_list = 0;
		}
	}

	if (put_in_list)
	{
		/* there cannot be multiple writers or a new writer
		 * while the data is in read mode */

		handle->busy_count++;
		/* enqueue the request */
		struct _starpu_data_requester *r = _starpu_data_requester_new();
		r->mode = mode;
		r->is_requested_by_codelet = request_from_codelet;
		r->j = j;
		r->buffer_index = buffer_index;
		r->ready_data_callback = callback;
		r->argcb = argcb;

		/* We put the requester in a specific list if this is a reduction task */
		struct _starpu_data_requester_list *req_list =
			is_a_reduction_task?&handle->reduction_req_list:&handle->req_list;

		_starpu_data_requester_list_push_back(req_list, r);

		/* failed */
		put_in_list = 1;
	}
	else
	{
		handle->refcnt++;
		handle->busy_count++;

		/* Do not write to handle->current_mode if it is already
		 * R. This avoids a spurious warning from helgrind when
		 * the following happens:
		 * acquire(R) in thread A
		 * acquire(R) in thread B
		 * release_data_on_node() in thread A
		 * helgrind would shout that the latter reads current_mode
		 * unsafely.
		 *
		 * This actually basically explains helgrind that it is a
		 * shared R acquisition.
		 */
		if (mode != STARPU_R || handle->current_mode != mode)
			handle->current_mode = mode;

		if ((mode == STARPU_REDUX) && (previous_mode != STARPU_REDUX))
			_starpu_data_start_reduction_mode(handle);

		/* success */
		put_in_list = 0;
	}

	_starpu_spin_unlock(&handle->header_lock);
	return put_in_list;

}
Beispiel #4
0
void *_starpu_cpu_worker(void *arg)
{
   struct starpu_worker_s *cpu_arg = arg;
   unsigned memnode = cpu_arg->memory_node;
   int workerid = cpu_arg->workerid;
   int devid = cpu_arg->devid;

#ifdef STARPU_USE_FXT
   _starpu_fxt_register_thread(cpu_arg->bindid);
#endif
   STARPU_TRACE_WORKER_INIT_START(STARPU_FUT_CPU_KEY, devid, memnode);

   _starpu_bind_thread_on_cpu(cpu_arg->config, cpu_arg->bindid);

   _STARPU_DEBUG("cpu worker %d is ready on logical cpu %d\n", devid, cpu_arg->bindid);

   _starpu_set_local_memory_node_key(&memnode);

   _starpu_set_local_worker_key(cpu_arg);

   snprintf(cpu_arg->name, 32, "CPU %d", devid);

   cpu_arg->status = STATUS_UNKNOWN;

   STARPU_TRACE_WORKER_INIT_END

      /* tell the main thread that we are ready */
      PTHREAD_MUTEX_LOCK(&cpu_arg->mutex);
   cpu_arg->worker_is_initialized = 1;
   PTHREAD_COND_SIGNAL(&cpu_arg->ready_cond);
   PTHREAD_MUTEX_UNLOCK(&cpu_arg->mutex);

   starpu_job_t j;
   int res;

   while (_starpu_machine_is_running())
   {
      STARPU_TRACE_START_PROGRESS(memnode);
      _starpu_datawizard_progress(memnode, 1);
      STARPU_TRACE_END_PROGRESS(memnode);

      _starpu_execute_registered_progression_hooks();

      PTHREAD_MUTEX_LOCK(cpu_arg->sched_mutex);

      /* perhaps there is some local task to be executed first */
      j = _starpu_pop_local_task(cpu_arg);

      /* otherwise ask a task to the scheduler */
      if (!j)
      {
         struct starpu_task *task = _starpu_pop_task();
         if (task)
            j = _starpu_get_job_associated_to_task(task);
      }

      if (j == NULL) 
      {
         if (_starpu_worker_can_block(memnode))
            _starpu_block_worker(workerid, cpu_arg->sched_cond, cpu_arg->sched_mutex);

         PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);

         continue;
      };

      PTHREAD_MUTEX_UNLOCK(cpu_arg->sched_mutex);

      /* can a cpu perform that task ? */
      if (!STARPU_CPU_MAY_PERFORM(j)) 
      {
         /* put it and the end of the queue ... XXX */
         _starpu_push_task(j, 0);
         continue;
      }

      _starpu_set_current_task(j->task);

      res = execute_job_on_cpu(j, cpu_arg);

      _starpu_set_current_task(NULL);

      if (res) {
         switch (res) {
            case -EAGAIN:
               _starpu_push_task(j, 0);
               continue;
            default: 
               assert(0);
         }
      }

      _starpu_handle_job_termination(j, 0);
   }

   STARPU_TRACE_WORKER_DEINIT_START

      /* In case there remains some memory that was automatically
       * allocated by StarPU, we release it now. Note that data
       * coherency is not maintained anymore at that point ! */
      _starpu_free_all_automatically_allocated_buffers(memnode);

   STARPU_TRACE_WORKER_DEINIT_END(STARPU_FUT_CPU_KEY);

   pthread_exit(NULL);
}