Пример #1
0
void
GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
		size_t mapnum, void **hostaddrs, size_t *sizes,
		unsigned short *kinds,
		int num_gangs, int num_workers, int vector_length,
		int async, int num_waits, ...)
{
  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  va_list ap;
  struct goacc_thread *thr;
  struct gomp_device_descr *acc_dev;
  struct target_mem_desc *tgt;
  void **devaddrs;
  unsigned int i;
  struct splay_tree_key_s k;
  splay_tree_key tgt_fn_key;
  void (*tgt_fn);

  if (num_gangs != 1)
    gomp_fatal ("num_gangs (%d) different from one is not yet supported",
		num_gangs);
  if (num_workers != 1)
    gomp_fatal ("num_workers (%d) different from one is not yet supported",
		num_workers);

  gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
	      __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);

  select_acc_device (device);

  thr = goacc_thread ();
  acc_dev = thr->dev;

  /* Host fallback if "if" clause is false or if the current device is set to
     the host.  */
  if (host_fallback)
    {
      goacc_save_and_set_bind (acc_device_host);
      fn (hostaddrs);
      goacc_restore_bind ();
      return;
    }
  else if (acc_device_type (acc_dev->type) == acc_device_host)
    {
      fn (hostaddrs);
      return;
    }

  va_start (ap, num_waits);
  
  if (num_waits > 0)
    goacc_wait (async, num_waits, ap);

  va_end (ap);

  acc_dev->openacc.async_set_async_func (async);

  if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    {
      k.host_start = (uintptr_t) fn;
      k.host_end = k.host_start + 1;
      gomp_mutex_lock (&acc_dev->mem_map.lock);
      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
      gomp_mutex_unlock (&acc_dev->mem_map.lock);

      if (tgt_fn_key == NULL)
	gomp_fatal ("target function wasn't mapped");

      tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
    }
  else
    tgt_fn = (void (*)) fn;

  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
		       false);

  devaddrs = alloca (sizeof (void *) * mapnum);
  for (i = 0; i < mapnum; i++)
    devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
			    + tgt->list[i]->tgt_offset);

  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
			      num_gangs, num_workers, vector_length, async,
			      tgt);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (tgt, true);
  else
    {
      gomp_copy_from_async (tgt);
      acc_dev->openacc.register_async_cleanup_func (tgt);
    }

  acc_dev->openacc.async_set_async_func (acc_async_sync);
}
Пример #2
0
void
GOMP_atomic_end (void)
{
  gomp_mutex_unlock (&atomic_lock);
}
Пример #3
0
void
GOMP_critical_end (void)
{
  gomp_mutex_unlock (&default_lock);
}
Пример #4
0
unsigned
gomp_resolve_num_threads (unsigned specified, unsigned count)
{
  struct gomp_thread *thread = gomp_thread();
  struct gomp_task_icv *icv;
  unsigned threads_requested, max_num_threads, num_threads;
  unsigned long remaining;

  icv = gomp_icv (false);

  if (specified == 1)
    return 1;
  else if (thread->ts.active_level >= 1 && !icv->nest_var)
    return 1;
  else if (thread->ts.active_level >= gomp_max_active_levels_var)
    return 1;

  /* If NUM_THREADS not specified, use nthreads_var.  */
  if (specified == 0)
    threads_requested = icv->nthreads_var;
  else
    threads_requested = specified;

  max_num_threads = threads_requested;

  /* If dynamic threads are enabled, bound the number of threads
     that we launch.  */
  if (icv->dyn_var)
    {
      unsigned dyn = gomp_dynamic_max_threads ();
      if (dyn < max_num_threads)
	max_num_threads = dyn;

      /* Optimization for parallel sections.  */
      if (count && count < max_num_threads)
	max_num_threads = count;
    }

  /* ULONG_MAX stands for infinity.  */
  if (__builtin_expect (gomp_thread_limit_var == ULONG_MAX, 1)
      || max_num_threads == 1)
    return max_num_threads;

#ifdef HAVE_SYNC_BUILTINS
  do
    {
      remaining = gomp_remaining_threads_count;
      num_threads = max_num_threads;
      if (num_threads > remaining)
	num_threads = remaining + 1;
    }
  while (__sync_val_compare_and_swap (&gomp_remaining_threads_count,
				      remaining, remaining - num_threads + 1)
	 != remaining);
#else
  gomp_mutex_lock (&gomp_remaining_threads_lock);
  num_threads = max_num_threads;
  remaining = gomp_remaining_threads_count;
  if (num_threads > remaining)
    num_threads = remaining + 1;
  gomp_remaining_threads_count -= num_threads - 1;
  gomp_mutex_unlock (&gomp_remaining_threads_lock);
#endif

  return num_threads;
}
Пример #5
0
void
gomp_unset_lock_30 (omp_lock_t *lock)
{
  gomp_mutex_unlock (lock);
}
Пример #6
0
static struct gomp_device_descr *
resolve_device (acc_device_t d, bool fail_is_error)
{
  acc_device_t d_arg = d;

  switch (d)
    {
    case acc_device_default:
      {
	if (goacc_device_type)
	  {
	    /* Lookup the named device.  */
	    while (++d != _ACC_device_hwm)
	      if (dispatchers[d]
		  && !strcasecmp (goacc_device_type,
				  get_openacc_name (dispatchers[d]->name))
		  && dispatchers[d]->get_num_devices_func () > 0)
		goto found;

	    if (fail_is_error)
	      {
		gomp_mutex_unlock (&acc_device_lock);
		gomp_fatal ("device type %s not supported", goacc_device_type);
	      }
	    else
	      return NULL;
	  }

	/* No default device specified, so start scanning for any non-host
	   device that is available.  */
	d = acc_device_not_host;
      }
      /* FALLTHROUGH */

    case acc_device_not_host:
      /* Find the first available device after acc_device_not_host.  */
      while (++d != _ACC_device_hwm)
	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
	  goto found;
      if (d_arg == acc_device_default)
	{
	  d = acc_device_host;
	  goto found;
	}
      if (fail_is_error)
        {
	  gomp_mutex_unlock (&acc_device_lock);
	  gomp_fatal ("no device found");
	}
      else
        return NULL;
      break;

    case acc_device_host:
      break;

    default:
      if (d > _ACC_device_hwm)
	{
	  if (fail_is_error)
	    goto unsupported_device;
	  else
	    return NULL;
	}
      break;
    }
 found:

  assert (d != acc_device_none
	  && d != acc_device_default
	  && d != acc_device_not_host);

  if (dispatchers[d] == NULL && fail_is_error)
    {
    unsupported_device:
      gomp_mutex_unlock (&acc_device_lock);
      gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
    }

  return dispatchers[d];
}
Пример #7
0
unsigned
gomp_resolve_num_threads (unsigned specified, unsigned count)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_task_icv *icv;
  unsigned threads_requested, max_num_threads, num_threads;
  unsigned long busy;
  struct gomp_thread_pool *pool;

  icv = gomp_icv (false);

  if (specified == 1)
    return 1;
  else if (thr->ts.active_level >= 1 && !icv->nest_var)
    return 1;
  else if (thr->ts.active_level >= gomp_max_active_levels_var)
    return 1;

  /* If NUM_THREADS not specified, use nthreads_var.  */
  if (specified == 0)
    threads_requested = icv->nthreads_var;
  else
    threads_requested = specified;

  max_num_threads = threads_requested;

  /* If dynamic threads are enabled, bound the number of threads
     that we launch.  */
  if (icv->dyn_var)
    {
      unsigned dyn = gomp_dynamic_max_threads ();
      if (dyn < max_num_threads)
	max_num_threads = dyn;

      /* Optimization for parallel sections.  */
      if (count && count < max_num_threads)
	max_num_threads = count;
    }

  /* UINT_MAX stands for infinity.  */
  if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
      || max_num_threads == 1)
    return max_num_threads;

  /* The threads_busy counter lives in thread_pool, if there
     isn't a thread_pool yet, there must be just one thread
     in the contention group.  If thr->team is NULL, this isn't
     nested parallel, so there is just one thread in the
     contention group as well, no need to handle it atomically.  */
  pool = thr->thread_pool;
  if (thr->ts.team == NULL)
    {
      num_threads = max_num_threads;
      if (num_threads > icv->thread_limit_var)
	num_threads = icv->thread_limit_var;
      if (pool)
	pool->threads_busy = num_threads;
      return num_threads;
    }

#ifdef HAVE_SYNC_BUILTINS
  do
    {
      busy = pool->threads_busy;
      num_threads = max_num_threads;
      if (icv->thread_limit_var - busy + 1 < num_threads)
	num_threads = icv->thread_limit_var - busy + 1;
    }
  while (__sync_val_compare_and_swap (&pool->threads_busy,
				      busy, busy + num_threads - 1)
	 != busy);
#else
  gomp_mutex_lock (&gomp_managed_threads_lock);
  num_threads = max_num_threads;
  busy = pool->threads_busy;
  if (icv->thread_limit_var - busy + 1 < num_threads)
    num_threads = icv->thread_limit_var - busy + 1;
  pool->threads_busy += num_threads - 1;
  gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif

  return num_threads;
}
Пример #8
0
Файл: task.c Проект: jtramm/gcc
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	   long arg_size, long arg_align, bool if_clause, unsigned flags,
	   void **depend, int priority)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    if_clause = false;
  flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
    priority = 0;
  /* FIXME, use priority.  */
  (void) priority;

  if (!if_clause || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count > 64 * team->nthreads)
    {
      struct gomp_task task;

      /* If there are depend clauses and earlier deferred sibling tasks
	 with depend clauses, check if there isn't a dependency.  If there
	 is, we need to wait for them.  There is no need to handle
	 depend clauses for non-deferred tasks other than this, because
	 the parent task is suspended until the child task finishes and thus
	 it can't start further child tasks.  */
      if ((flags & GOMP_TASK_FLAG_DEPEND)
	  && thr->task && thr->task->depend_hash)
	gomp_task_maybe_wait_for_dependencies (depend);

      gomp_init_task (&task, thr->task, gomp_icv (false));
      task.kind = GOMP_TASK_UNDEFERRED;
      task.final_task = (thr->task && thr->task->final_task)
			|| (flags & GOMP_TASK_FLAG_FINAL);
      if (thr->task)
	{
	  task.in_tied_task = thr->task->in_tied_task;
	  task.taskgroup = thr->task->taskgroup;
	}
      thr->task = &task;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  char buf[arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  cpyfn (arg, data);
	  fn (arg);
	}
      else
	fn (data);
      /* Access to "children" is normally done inside a task_lock
	 mutex region, but the only way this particular task.children
	 can be set is if this thread's task work function (fn)
	 creates children.  So since the setter is *this* thread, we
	 need no barriers here when testing for non-NULL.  We can have
	 task.children set by the current thread then changed by a
	 child thread, but seeing a stale non-NULL value is not a
	 problem.  Once past the task_lock acquisition, this thread
	 will see the real value of task.children.  */
      if (task.children != NULL)
	{
	  gomp_mutex_lock (&team->task_lock);
	  gomp_clear_parent (task.children);
	  gomp_mutex_unlock (&team->task_lock);
	}
      gomp_end_task ();
    }
  else
    {
      struct gomp_task *task;
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      bool do_wake;
      size_t depend_size = 0;

      if (flags & GOMP_TASK_FLAG_DEPEND)
	depend_size = ((uintptr_t) depend[0]
		       * sizeof (struct gomp_task_depend_entry));
      task = gomp_malloc (sizeof (*task) + depend_size
			  + arg_size + arg_align - 1);
      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
		      & ~(uintptr_t) (arg_align - 1));
      gomp_init_task (task, parent, gomp_icv (false));
      task->kind = GOMP_TASK_UNDEFERRED;
      task->in_tied_task = parent->in_tied_task;
      task->taskgroup = taskgroup;
      thr->task = task;
      if (cpyfn)
	{
	  cpyfn (arg, data);
	  task->copy_ctors_done = true;
	}
      else
	memcpy (arg, data, arg_size);
      thr->task = parent;
      task->kind = GOMP_TASK_WAITING;
      task->fn = fn;
      task->fn_data = arg;
      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && !task->copy_ctors_done, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  gomp_finish_task (task);
	  free (task);
	  return;
	}
      if (taskgroup)
	taskgroup->num_children++;
      if (depend_size)
	{
	  gomp_task_handle_depend (task, parent, depend);
	  if (task->num_dependees)
	    {
	      gomp_mutex_unlock (&team->task_lock);
	      return;
	    }
	}
      if (parent->children)
	{
	  task->next_child = parent->children;
	  task->prev_child = parent->children->prev_child;
	  task->next_child->prev_child = task;
	  task->prev_child->next_child = task;
	}
      else
	{
	  task->next_child = task;
	  task->prev_child = task;
	}
      parent->children = task;
      if (taskgroup)
	{
	  /* If applicable, place task into its taskgroup.  */
	  if (taskgroup->children)
	    {
	      task->next_taskgroup = taskgroup->children;
	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	      task->next_taskgroup->prev_taskgroup = task;
	      task->prev_taskgroup->next_taskgroup = task;
	    }
	  else
	    {
	      task->next_taskgroup = task;
	      task->prev_taskgroup = task;
	    }
	  taskgroup->children = task;
	}
      if (team->task_queue)
	{
	  task->next_queue = team->task_queue;
	  task->prev_queue = team->task_queue->prev_queue;
	  task->next_queue->prev_queue = task;
	  task->prev_queue->next_queue = task;
	}
      else
	{
	  task->next_queue = task;
	  task->prev_queue = task;
	  team->task_queue = task;
	}
      ++team->task_count;
      ++team->task_queued_count;
      gomp_team_barrier_set_task_pending (&team->barrier);
      do_wake = team->task_running_count + !parent->in_tied_task
		< team->nthreads;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, 1);
    }
}
Пример #9
0
Файл: task.c Проект: jtramm/gcc
void
gomp_create_target_task (struct gomp_device_descr *devicep,
			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
			 size_t *sizes, unsigned short *kinds,
			 unsigned int flags, void **depend)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  struct gomp_target_task *ttask;
  struct gomp_task *task;
  struct gomp_task *parent = thr->task;
  struct gomp_taskgroup *taskgroup = parent->taskgroup;
  bool do_wake;
  size_t depend_size = 0;

  if (depend != NULL)
    depend_size = ((uintptr_t) depend[0]
		   * sizeof (struct gomp_task_depend_entry));
  task = gomp_malloc (sizeof (*task) + depend_size
		      + sizeof (*ttask)
		      + mapnum * (sizeof (void *) + sizeof (size_t)
				  + sizeof (unsigned short)));
  gomp_init_task (task, parent, gomp_icv (false));
  task->kind = GOMP_TASK_WAITING;
  task->in_tied_task = parent->in_tied_task;
  task->taskgroup = taskgroup;
  ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
  ttask->devicep = devicep;
  ttask->fn = fn;
  ttask->mapnum = mapnum;
  memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
  memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
  ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
  memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
  ttask->flags = flags;
  task->fn = gomp_target_task_fn;
  task->fn_data = ttask;
  task->final_task = 0;
  gomp_mutex_lock (&team->task_lock);
  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
			|| (taskgroup && taskgroup->cancelled), 0))
    {
      gomp_mutex_unlock (&team->task_lock);
      gomp_finish_task (task);
      free (task);
      return;
    }
  if (taskgroup)
    taskgroup->num_children++;
  if (depend_size)
    {
      gomp_task_handle_depend (task, parent, depend);
      if (task->num_dependees)
	{
	  gomp_mutex_unlock (&team->task_lock);
	  return;
	}
    }
  if (parent->children)
    {
      task->next_child = parent->children;
      task->prev_child = parent->children->prev_child;
      task->next_child->prev_child = task;
      task->prev_child->next_child = task;
    }
  else
    {
      task->next_child = task;
      task->prev_child = task;
    }
  parent->children = task;
  if (taskgroup)
    {
      /* If applicable, place task into its taskgroup.  */
      if (taskgroup->children)
	{
	  task->next_taskgroup = taskgroup->children;
	  task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	  task->next_taskgroup->prev_taskgroup = task;
	  task->prev_taskgroup->next_taskgroup = task;
	}
      else
	{
	  task->next_taskgroup = task;
	  task->prev_taskgroup = task;
	}
      taskgroup->children = task;
    }
  if (team->task_queue)
    {
      task->next_queue = team->task_queue;
      task->prev_queue = team->task_queue->prev_queue;
      task->next_queue->prev_queue = task;
      task->prev_queue->next_queue = task;
    }
  else
    {
      task->next_queue = task;
      task->prev_queue = task;
      team->task_queue = task;
    }
  ++team->task_count;
  ++team->task_queued_count;
  gomp_team_barrier_set_task_pending (&team->barrier);
  do_wake = team->task_running_count + !parent->in_tied_task
	    < team->nthreads;
  gomp_mutex_unlock (&team->task_lock);
  if (do_wake)
    gomp_team_barrier_wake (&team->barrier, 1);
}
Пример #10
0
Файл: task.c Проект: jtramm/gcc
void
gomp_task_maybe_wait_for_dependencies (void **depend)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_task *task = thr->task;
  struct gomp_team *team = thr->ts.team;
  struct gomp_task_depend_entry elem, *ent = NULL;
  struct gomp_taskwait taskwait;
  struct gomp_task *last_parent_depends_on = NULL;
  size_t ndepend = (uintptr_t) depend[0];
  size_t nout = (uintptr_t) depend[1];
  size_t i;
  size_t num_awaited = 0;
  struct gomp_task *child_task = NULL;
  struct gomp_task *to_free = NULL;
  int do_wake = 0;

  gomp_mutex_lock (&team->task_lock);
  for (i = 0; i < ndepend; i++)
    {
      elem.addr = depend[i + 2];
      ent = htab_find (task->depend_hash, &elem);
      for (; ent; ent = ent->next)
	if (i >= nout && ent->is_in)
	  continue;
	else
	  {
	    struct gomp_task *tsk = ent->task;
	    if (!tsk->parent_depends_on)
	      {
		tsk->parent_depends_on = true;
		++num_awaited;
		/* If a task we need to wait for is not already
		   running and is ready to be scheduled, move it to
		   front, so that we run it as soon as possible.

		   We rearrange the children queue such that all
		   parent_depends_on tasks are first, and
		   last_parent_depends_on points to the last such task
		   we rearranged.  For example, given the following
		   children where PD[123] are the parent_depends_on
		   tasks:

			task->children
			|
			V
			C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4

		   We rearrange such that:

			task->children
			|	       +--- last_parent_depends_on
			|	       |
			V	       V
			PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4
		*/

		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
		  {
		    if (last_parent_depends_on)
		      {
			tsk->prev_child->next_child = tsk->next_child;
			tsk->next_child->prev_child = tsk->prev_child;
			tsk->prev_child = last_parent_depends_on;
			tsk->next_child = last_parent_depends_on->next_child;
			tsk->prev_child->next_child = tsk;
			tsk->next_child->prev_child = tsk;
		      }
		    else if (tsk != task->children)
		      {
			tsk->prev_child->next_child = tsk->next_child;
			tsk->next_child->prev_child = tsk->prev_child;
			tsk->prev_child = task->children->prev_child;
			tsk->next_child = task->children;
			task->children = tsk;
			tsk->prev_child->next_child = tsk;
			tsk->next_child->prev_child = tsk;
		      }
		    last_parent_depends_on = tsk;
		  }
	      }
	  }
    }
  if (num_awaited == 0)
    {
      gomp_mutex_unlock (&team->task_lock);
      return;
    }

  memset (&taskwait, 0, sizeof (taskwait));
  taskwait.n_depend = num_awaited;
  taskwait.last_parent_depends_on = last_parent_depends_on;
  gomp_sem_init (&taskwait.taskwait_sem, 0);
  task->taskwait = &taskwait;

  while (1)
    {
      bool cancelled = false;
      if (taskwait.n_depend == 0)
	{
	  task->taskwait = NULL;
	  gomp_mutex_unlock (&team->task_lock);
	  if (to_free)
	    {
	      gomp_finish_task (to_free);
	      free (to_free);
	    }
	  gomp_sem_destroy (&taskwait.taskwait_sem);
	  return;
	}
      if (task->children->kind == GOMP_TASK_WAITING)
	{
	  child_task = task->children;
	  cancelled
	    = gomp_task_run_pre (child_task, task, team);
	  if (__builtin_expect (cancelled, 0))
	    {
	      if (to_free)
		{
		  gomp_finish_task (to_free);
		  free (to_free);
		  to_free = NULL;
		}
	      goto finish_cancelled;
	    }
	}
      else
	/* All tasks we are waiting for are already running
	   in other threads.  Wait for them.  */
	taskwait.in_depend_wait = true;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	{
	  gomp_team_barrier_wake (&team->barrier, do_wake);
	  do_wake = 0;
	}
      if (to_free)
	{
	  gomp_finish_task (to_free);
	  free (to_free);
	  to_free = NULL;
	}
      if (child_task)
	{
	  thr->task = child_task;
	  child_task->fn (child_task->fn_data);
	  thr->task = task;
	}
      else
	gomp_sem_wait (&taskwait.taskwait_sem);
      gomp_mutex_lock (&team->task_lock);
      if (child_task)
	{
	 finish_cancelled:;
	  size_t new_tasks
	    = gomp_task_run_post_handle_depend (child_task, team);
	  if (child_task->parent_depends_on)
	    --taskwait.n_depend;

	  /* Remove child_task from sibling list.  */
	  child_task->prev_child->next_child = child_task->next_child;
	  child_task->next_child->prev_child = child_task->prev_child;
	  if (task->children == child_task)
	    {
	      if (child_task->next_child != child_task)
		task->children = child_task->next_child;
	      else
		task->children = NULL;
	    }

	  gomp_clear_parent (child_task->children);
	  gomp_task_run_post_remove_taskgroup (child_task);
	  to_free = child_task;
	  child_task = NULL;
	  team->task_count--;
	  if (new_tasks > 1)
	    {
	      do_wake = team->nthreads - team->task_running_count
			- !task->in_tied_task;
	      if (do_wake > new_tasks)
		do_wake = new_tasks;
	    }
	}
    }
}
Пример #11
0
Файл: task.c Проект: jtramm/gcc
void
GOMP_taskgroup_end (void)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  struct gomp_task *task = thr->task;
  struct gomp_taskgroup *taskgroup;
  struct gomp_task *child_task = NULL;
  struct gomp_task *to_free = NULL;
  int do_wake = 0;

  if (team == NULL)
    return;
  taskgroup = task->taskgroup;

  /* The acquire barrier on load of taskgroup->num_children here
     synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
     It is not necessary that we synchronize with other non-0 writes at
     this point, but we must ensure that all writes to memory by a
     child thread task work function are seen before we exit from
     GOMP_taskgroup_end.  */
  if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
    goto finish;

  gomp_mutex_lock (&team->task_lock);
  while (1)
    {
      bool cancelled = false;
      if (taskgroup->children == NULL)
	{
	  if (taskgroup->num_children)
	    {
	      if (task->children == NULL)
		goto do_wait;
	      child_task = task->children;
            }
          else
	    {
	      gomp_mutex_unlock (&team->task_lock);
	      if (to_free)
		{
		  gomp_finish_task (to_free);
		  free (to_free);
		}
	      goto finish;
	    }
	}
      else
	child_task = taskgroup->children;
      if (child_task->kind == GOMP_TASK_WAITING)
	{
	  cancelled
	    = gomp_task_run_pre (child_task, child_task->parent, team);
	  if (__builtin_expect (cancelled, 0))
	    {
	      if (to_free)
		{
		  gomp_finish_task (to_free);
		  free (to_free);
		  to_free = NULL;
		}
	      goto finish_cancelled;
	    }
	}
      else
	{
	  child_task = NULL;
	 do_wait:
	  /* All tasks we are waiting for are already running
	     in other threads.  Wait for them.  */
	  taskgroup->in_taskgroup_wait = true;
	}
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	{
	  gomp_team_barrier_wake (&team->barrier, do_wake);
	  do_wake = 0;
	}
      if (to_free)
	{
	  gomp_finish_task (to_free);
	  free (to_free);
	  to_free = NULL;
	}
      if (child_task)
	{
	  thr->task = child_task;
	  child_task->fn (child_task->fn_data);
	  thr->task = task;
	}
      else
	gomp_sem_wait (&taskgroup->taskgroup_sem);
      gomp_mutex_lock (&team->task_lock);
      if (child_task)
	{
	 finish_cancelled:;
	  size_t new_tasks
	    = gomp_task_run_post_handle_depend (child_task, team);
	  gomp_task_run_post_remove_parent (child_task);
	  gomp_clear_parent (child_task->children);
	  gomp_task_run_post_remove_taskgroup (child_task);
	  to_free = child_task;
	  child_task = NULL;
	  team->task_count--;
	  if (new_tasks > 1)
	    {
	      do_wake = team->nthreads - team->task_running_count
			- !task->in_tied_task;
	      if (do_wake > new_tasks)
		do_wake = new_tasks;
	    }
	}
    }

 finish:
  task->taskgroup = taskgroup->prev;
  gomp_sem_destroy (&taskgroup->taskgroup_sem);
  free (taskgroup);
}
Пример #12
0
Файл: task.c Проект: jtramm/gcc
void
GOMP_taskwait (void)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  struct gomp_task *task = thr->task;
  struct gomp_task *child_task = NULL;
  struct gomp_task *to_free = NULL;
  struct gomp_taskwait taskwait;
  int do_wake = 0;

  /* The acquire barrier on load of task->children here synchronizes
     with the write of a NULL in gomp_task_run_post_remove_parent.  It is
     not necessary that we synchronize with other non-NULL writes at
     this point, but we must ensure that all writes to memory by a
     child thread task work function are seen before we exit from
     GOMP_taskwait.  */
  if (task == NULL
      || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL)
    return;

  memset (&taskwait, 0, sizeof (taskwait));
  gomp_mutex_lock (&team->task_lock);
  while (1)
    {
      bool cancelled = false;
      if (task->children == NULL)
	{
	  bool destroy_taskwait = task->taskwait != NULL;
	  task->taskwait = NULL;
	  gomp_mutex_unlock (&team->task_lock);
	  if (to_free)
	    {
	      gomp_finish_task (to_free);
	      free (to_free);
	    }
	  if (destroy_taskwait)
	    gomp_sem_destroy (&taskwait.taskwait_sem);
	  return;
	}
      if (task->children->kind == GOMP_TASK_WAITING)
	{
	  child_task = task->children;
	  cancelled
	    = gomp_task_run_pre (child_task, task, team);
	  if (__builtin_expect (cancelled, 0))
	    {
	      if (to_free)
		{
		  gomp_finish_task (to_free);
		  free (to_free);
		  to_free = NULL;
		}
	      goto finish_cancelled;
	    }
	}
      else
	{
	  /* All tasks we are waiting for are already running
	     in other threads.  Wait for them.  */
	  if (task->taskwait == NULL)
	    {
	      taskwait.in_depend_wait = false;
	      gomp_sem_init (&taskwait.taskwait_sem, 0);
	      task->taskwait = &taskwait;
	    }
	  taskwait.in_taskwait = true;
	}
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	{
	  gomp_team_barrier_wake (&team->barrier, do_wake);
	  do_wake = 0;
	}
      if (to_free)
	{
	  gomp_finish_task (to_free);
	  free (to_free);
	  to_free = NULL;
	}
      if (child_task)
	{
	  thr->task = child_task;
	  child_task->fn (child_task->fn_data);
	  thr->task = task;
	}
      else
	gomp_sem_wait (&taskwait.taskwait_sem);
      gomp_mutex_lock (&team->task_lock);
      if (child_task)
	{
	 finish_cancelled:;
	  size_t new_tasks
	    = gomp_task_run_post_handle_depend (child_task, team);

	  /* Remove child_task from children list, and set up the next
	     sibling to be run.  */
	  child_task->prev_child->next_child = child_task->next_child;
	  child_task->next_child->prev_child = child_task->prev_child;
	  if (task->children == child_task)
	    {
	      if (child_task->next_child != child_task)
		task->children = child_task->next_child;
	      else
		task->children = NULL;
	    }
	  /* Orphan all the children of CHILD_TASK.  */
	  gomp_clear_parent (child_task->children);

	  /* Remove CHILD_TASK from its taskgroup.  */
	  gomp_task_run_post_remove_taskgroup (child_task);

	  to_free = child_task;
	  child_task = NULL;
	  team->task_count--;
	  if (new_tasks > 1)
	    {
	      do_wake = team->nthreads - team->task_running_count
			- !task->in_tied_task;
	      if (do_wake > new_tasks)
		do_wake = new_tasks;
	    }
	}
    }
}
Пример #13
0
Файл: task.c Проект: jtramm/gcc
void
gomp_barrier_handle_tasks (gomp_barrier_state_t state)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  struct gomp_task *task = thr->task;
  struct gomp_task *child_task = NULL;
  struct gomp_task *to_free = NULL;
  int do_wake = 0;

  gomp_mutex_lock (&team->task_lock);
  if (gomp_barrier_last_thread (state))
    {
      if (team->task_count == 0)
	{
	  gomp_team_barrier_done (&team->barrier, state);
	  gomp_mutex_unlock (&team->task_lock);
	  gomp_team_barrier_wake (&team->barrier, 0);
	  return;
	}
      gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
    }

  while (1)
    {
      bool cancelled = false;
      if (team->task_queue != NULL)
	{
	  child_task = team->task_queue;
	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
					 team);
	  if (__builtin_expect (cancelled, 0))
	    {
	      if (to_free)
		{
		  gomp_finish_task (to_free);
		  free (to_free);
		  to_free = NULL;
		}
	      goto finish_cancelled;
	    }
	  team->task_running_count++;
	  child_task->in_tied_task = true;
	}
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	{
	  gomp_team_barrier_wake (&team->barrier, do_wake);
	  do_wake = 0;
	}
      if (to_free)
	{
	  gomp_finish_task (to_free);
	  free (to_free);
	  to_free = NULL;
	}
      if (child_task)
	{
	  thr->task = child_task;
	  child_task->fn (child_task->fn_data);
	  thr->task = task;
	}
      else
	return;
      gomp_mutex_lock (&team->task_lock);
      if (child_task)
	{
	 finish_cancelled:;
	  size_t new_tasks
	    = gomp_task_run_post_handle_depend (child_task, team);
	  gomp_task_run_post_remove_parent (child_task);
	  gomp_clear_parent (child_task->children);
	  gomp_task_run_post_remove_taskgroup (child_task);
	  to_free = child_task;
	  child_task = NULL;
	  if (!cancelled)
	    team->task_running_count--;
	  if (new_tasks > 1)
	    {
	      do_wake = team->nthreads - team->task_running_count;
	      if (do_wake > new_tasks)
		do_wake = new_tasks;
	    }
	  if (--team->task_count == 0
	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
	    {
	      gomp_team_barrier_done (&team->barrier, state);
	      gomp_mutex_unlock (&team->task_lock);
	      gomp_team_barrier_wake (&team->barrier, 0);
	      gomp_mutex_lock (&team->task_lock);
	    }
	}
    }
}
Пример #14
0
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	   long arg_size, long arg_align, bool if_clause, unsigned flags,
	   void **depend)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    if_clause = false;
  if (flags & 1)
    flags &= ~1;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  if (!if_clause || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count > 64 * team->nthreads)
    {
      struct gomp_task task;

      /* If there are depend clauses and earlier deferred sibling tasks
	 with depend clauses, check if there isn't a dependency.  If there
	 is, we need to wait for them.  There is no need to handle
	 depend clauses for non-deferred tasks other than this, because
	 the parent task is suspended until the child task finishes and thus
	 it can't start further child tasks.  */
      if ((flags & 8) && thr->task && thr->task->depend_hash)
	gomp_task_maybe_wait_for_dependencies (depend);

      gomp_init_task (&task, thr->task, gomp_icv (false));
      task.kind = GOMP_TASK_IFFALSE;
      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
      if (thr->task)
	{
	  task.in_tied_task = thr->task->in_tied_task;
	  task.taskgroup = thr->task->taskgroup;
	}
      thr->task = &task;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  char buf[arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  cpyfn (arg, data);
	  fn (arg);
	}
      else
	fn (data);
      /* Access to "children" is normally done inside a task_lock
	 mutex region, but the only way this particular task.children
	 can be set is if this thread's task work function (fn)
	 creates children.  So since the setter is *this* thread, we
	 need no barriers here when testing for non-NULL.  We can have
	 task.children set by the current thread then changed by a
	 child thread, but seeing a stale non-NULL value is not a
	 problem.  Once past the task_lock acquisition, this thread
	 will see the real value of task.children.  */
      if (task.children != NULL)
	{
	  gomp_mutex_lock (&team->task_lock);
	  gomp_clear_parent (task.children);
	  gomp_mutex_unlock (&team->task_lock);
	}
      gomp_end_task ();
    }
  else
    {
      struct gomp_task *task;
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      bool do_wake;
      size_t depend_size = 0;

      if (flags & 8)
	depend_size = ((uintptr_t) depend[0]
		       * sizeof (struct gomp_task_depend_entry));
      task = gomp_malloc (sizeof (*task) + depend_size
			  + arg_size + arg_align - 1);
      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
		      & ~(uintptr_t) (arg_align - 1));
      gomp_init_task (task, parent, gomp_icv (false));
      task->kind = GOMP_TASK_IFFALSE;
      task->in_tied_task = parent->in_tied_task;
      task->taskgroup = taskgroup;
      thr->task = task;
      if (cpyfn)
	{
	  cpyfn (arg, data);
	  task->copy_ctors_done = true;
	}
      else
	memcpy (arg, data, arg_size);
      thr->task = parent;
      task->kind = GOMP_TASK_WAITING;
      task->fn = fn;
      task->fn_data = arg;
      task->final_task = (flags & 2) >> 1;
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && !task->copy_ctors_done, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  gomp_finish_task (task);
	  free (task);
	  return;
	}
      if (taskgroup)
	taskgroup->num_children++;
      if (depend_size)
	{
	  size_t ndepend = (uintptr_t) depend[0];
	  size_t nout = (uintptr_t) depend[1];
	  size_t i;
	  hash_entry_type ent;

	  task->depend_count = ndepend;
	  task->num_dependees = 0;
	  if (parent->depend_hash == NULL)
	    parent->depend_hash
	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
	  for (i = 0; i < ndepend; i++)
	    {
	      task->depend[i].addr = depend[2 + i];
	      task->depend[i].next = NULL;
	      task->depend[i].prev = NULL;
	      task->depend[i].task = task;
	      task->depend[i].is_in = i >= nout;
	      task->depend[i].redundant = false;
	      task->depend[i].redundant_out = false;

	      hash_entry_type *slot
		= htab_find_slot (&parent->depend_hash, &task->depend[i],
				  INSERT);
	      hash_entry_type out = NULL, last = NULL;
	      if (*slot)
		{
		  /* If multiple depends on the same task are the
		     same, all but the first one are redundant.
		     As inout/out come first, if any of them is
		     inout/out, it will win, which is the right
		     semantics.  */
		  if ((*slot)->task == task)
		    {
		      task->depend[i].redundant = true;
		      continue;
		    }
		  for (ent = *slot; ent; ent = ent->next)
		    {
		      if (ent->redundant_out)
			break;

		      last = ent;

		      /* depend(in:...) doesn't depend on earlier
			 depend(in:...).  */
		      if (i >= nout && ent->is_in)
			continue;

		      if (!ent->is_in)
			out = ent;

		      struct gomp_task *tsk = ent->task;
		      if (tsk->dependers == NULL)
			{
			  tsk->dependers
			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
					   + 6 * sizeof (struct gomp_task *));
			  tsk->dependers->n_elem = 1;
			  tsk->dependers->allocated = 6;
			  tsk->dependers->elem[0] = task;
			  task->num_dependees++;
			  continue;
			}
		      /* We already have some other dependency on tsk
			 from earlier depend clause.  */
		      else if (tsk->dependers->n_elem
			       && (tsk->dependers->elem[tsk->dependers->n_elem
							- 1]
				   == task))
			continue;
		      else if (tsk->dependers->n_elem
			       == tsk->dependers->allocated)
			{
			  tsk->dependers->allocated
			    = tsk->dependers->allocated * 2 + 2;
			  tsk->dependers
			    = gomp_realloc (tsk->dependers,
					    sizeof (struct gomp_dependers_vec)
					    + (tsk->dependers->allocated
					       * sizeof (struct gomp_task *)));
			}
		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
		      task->num_dependees++;
		    }
		  task->depend[i].next = *slot;
		  (*slot)->prev = &task->depend[i];
		}
	      *slot = &task->depend[i];

	      /* There is no need to store more than one depend({,in}out:)
		 task per address in the hash table chain for the purpose
		 of creation of deferred tasks, because each out
		 depends on all earlier outs, thus it is enough to record
		 just the last depend({,in}out:).  For depend(in:), we need
		 to keep all of the previous ones not terminated yet, because
		 a later depend({,in}out:) might need to depend on all of
		 them.  So, if the new task's clause is depend({,in}out:),
		 we know there is at most one other depend({,in}out:) clause
		 in the list (out).  For non-deferred tasks we want to see
		 all outs, so they are moved to the end of the chain,
		 after first redundant_out entry all following entries
		 should be redundant_out.  */
	      if (!task->depend[i].is_in && out)
		{
		  if (out != last)
		    {
		      out->next->prev = out->prev;
		      out->prev->next = out->next;
		      out->next = last->next;
		      out->prev = last;
		      last->next = out;
		      if (out->next)
			out->next->prev = out;
		    }
		  out->redundant_out = true;
		}
	    }
	  if (task->num_dependees)
	    {
	      gomp_mutex_unlock (&team->task_lock);
	      return;
	    }
	}
      if (parent->children)
	{
	  task->next_child = parent->children;
	  task->prev_child = parent->children->prev_child;
	  task->next_child->prev_child = task;
	  task->prev_child->next_child = task;
	}
      else
	{
	  task->next_child = task;
	  task->prev_child = task;
	}
      parent->children = task;
      if (taskgroup)
	{
	  if (taskgroup->children)
	    {
	      task->next_taskgroup = taskgroup->children;
	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	      task->next_taskgroup->prev_taskgroup = task;
	      task->prev_taskgroup->next_taskgroup = task;
	    }
	  else
	    {
	      task->next_taskgroup = task;
	      task->prev_taskgroup = task;
	    }
	  taskgroup->children = task;
	}
      if (team->task_queue)
	{
	  task->next_queue = team->task_queue;
	  task->prev_queue = team->task_queue->prev_queue;
	  task->next_queue->prev_queue = task;
	  task->prev_queue->next_queue = task;
	}
      else
	{
	  task->next_queue = task;
	  task->prev_queue = task;
	  team->task_queue = task;
	}
      ++team->task_count;
      ++team->task_queued_count;
      gomp_team_barrier_set_task_pending (&team->barrier);
      do_wake = team->task_running_count + !parent->in_tied_task
		< team->nthreads;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, 1);
    }
}
Пример #15
0
void
acc_unmap_data (void *h)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  /* No need to call lazy open, as the address must have been mapped.  */

  /* This is a no-op on shared-memory targets.  */
  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return;

  size_t host_size;

  gomp_mutex_lock (&acc_dev->lock);

  splay_tree_key n = lookup_host (acc_dev, h, 1);
  struct target_mem_desc *t;

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  host_size = n->host_end - n->host_start;

  if (n->host_start != (uintptr_t) h)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] surrounds %p",
		  (void *) n->host_start, (int) host_size, (void *) h);
    }

  t = n->tgt;

  if (t->refcount == 2)
    {
      struct target_mem_desc *tp;

      /* This is the last reference, so pull the descriptor off the
         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
         freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	if (n->tgt == t)
	  {
	    if (tp)
	      tp->prev = t->prev;
	    else
	      acc_dev->openacc.data_environ = t->prev;

	    break;
	  }
    }

  gomp_mutex_unlock (&acc_dev->lock);

  gomp_unmap_vars (t, true);
}
Пример #16
0
void
GOACC_parallel_keyed (int device, void (*fn) (void *),
		      size_t mapnum, void **hostaddrs, size_t *sizes,
		      unsigned short *kinds, ...)
{
  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  va_list ap;
  struct goacc_thread *thr;
  struct gomp_device_descr *acc_dev;
  struct target_mem_desc *tgt;
  void **devaddrs;
  unsigned int i;
  struct splay_tree_key_s k;
  splay_tree_key tgt_fn_key;
  void (*tgt_fn);
  int async = GOMP_ASYNC_SYNC;
  unsigned dims[GOMP_DIM_MAX];
  unsigned tag;

#ifdef HAVE_INTTYPES_H
  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
#else
  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
#endif
  goacc_lazy_initialize ();

  thr = goacc_thread ();
  acc_dev = thr->dev;

  /* Host fallback if "if" clause is false or if the current device is set to
     the host.  */
  if (host_fallback)
    {
      goacc_save_and_set_bind (acc_device_host);
      fn (hostaddrs);
      goacc_restore_bind ();
      return;
    }
  else if (acc_device_type (acc_dev->type) == acc_device_host)
    {
      fn (hostaddrs);
      return;
    }

  va_start (ap, kinds);
  /* TODO: This will need amending when device_type is implemented.  */
  while ((tag = va_arg (ap, unsigned)) != 0)
    {
      if (GOMP_LAUNCH_DEVICE (tag))
	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
		    GOMP_LAUNCH_DEVICE (tag));

      switch (GOMP_LAUNCH_CODE (tag))
	{
	case GOMP_LAUNCH_DIM:
	  {
	    unsigned mask = GOMP_LAUNCH_OP (tag);

	    for (i = 0; i != GOMP_DIM_MAX; i++)
	      if (mask & GOMP_DIM_MASK (i))
		dims[i] = va_arg (ap, unsigned);
	  }
	  break;

	case GOMP_LAUNCH_ASYNC:
	  {
	    /* Small constant values are encoded in the operand.  */
	    async = GOMP_LAUNCH_OP (tag);

	    if (async == GOMP_LAUNCH_OP_MAX)
	      async = va_arg (ap, unsigned);
	    break;
	  }

	case GOMP_LAUNCH_WAIT:
	  {
	    unsigned num_waits = GOMP_LAUNCH_OP (tag);

	    if (num_waits)
	      goacc_wait (async, num_waits, &ap);
	    break;
	  }

	default:
	  gomp_fatal ("unrecognized offload code '%d',"
		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
	}
    }
  va_end (ap);
  
  acc_dev->openacc.async_set_async_func (async);

  if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    {
      k.host_start = (uintptr_t) fn;
      k.host_end = k.host_start + 1;
      gomp_mutex_lock (&acc_dev->lock);
      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
      gomp_mutex_unlock (&acc_dev->lock);

      if (tgt_fn_key == NULL)
	gomp_fatal ("target function wasn't mapped");

      tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    }
  else
    tgt_fn = (void (*)) fn;

  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
		       GOMP_MAP_VARS_OPENACC);

  devaddrs = gomp_alloca (sizeof (void *) * mapnum);
  for (i = 0; i < mapnum; i++)
    devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
			    + tgt->list[i].key->tgt_offset);

  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
			      async, dims, tgt);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (tgt, true);
  else
    {
      gomp_copy_from_async (tgt);
      acc_dev->openacc.register_async_cleanup_func (tgt);
    }

  acc_dev->openacc.async_set_async_func (acc_async_sync);
}
Пример #17
0
static void *
present_create_copy (unsigned f, void *h, size_t s)
{
  void *d;
  splay_tree_key n;

  if (!h || !s)
    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return h;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);
  if (n)
    {
      /* Present. */
      d = (void *) (n->tgt->tgt_start + n->tgt_offset);

      if (!(f & FLAG_PRESENT))
        {
	  gomp_mutex_unlock (&acc_dev->lock);
          gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
        	      (void *)h, (int)s, (void *)d, (int)s);
	}
      if ((h + s) > (void *)n->host_end)
	{
	  gomp_mutex_unlock (&acc_dev->lock);
	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
	}

      gomp_mutex_unlock (&acc_dev->lock);
    }
  else if (!(f & FLAG_CREATE))
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
    }
  else
    {
      struct target_mem_desc *tgt;
      size_t mapnum = 1;
      unsigned short kinds;
      void *hostaddrs = h;

      if (f & FLAG_COPY)
	kinds = GOMP_MAP_TO;
      else
	kinds = GOMP_MAP_ALLOC;

      gomp_mutex_unlock (&acc_dev->lock);

      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
			   GOMP_MAP_VARS_OPENACC);

      gomp_mutex_lock (&acc_dev->lock);

      d = tgt->to_free;
      tgt->prev = acc_dev->openacc.data_environ;
      acc_dev->openacc.data_environ = tgt;

      gomp_mutex_unlock (&acc_dev->lock);
    }

  return d;
}
Пример #18
0
void
gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;
  splay_tree_key n;
  struct target_mem_desc *t;
  int minrefs = (mapnum == 1) ? 2 : 3;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, 1);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);

  t = n->tgt;

  struct target_mem_desc *tp;

  if (t->refcount == minrefs)
    {
      /* This is the last reference, so pull the descriptor off the
	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
	 freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	{
	  if (n->tgt == t)
	    {
	      if (tp)
		tp->prev = t->prev;
	      else
		acc_dev->openacc.data_environ = t->prev;
	      break;
	    }
	}
    }

  if (force_copyfrom)
    t->list[0].copy_from = 1;

  gomp_mutex_unlock (&acc_dev->lock);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (t, true);
  else
    {
      gomp_copy_from_async (t);
      acc_dev->openacc.register_async_cleanup_func (t);
    }

  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
}
Пример #19
0
static void
acc_shutdown_1 (acc_device_t d)
{
  struct gomp_device_descr *base_dev;
  struct goacc_thread *walk;
  int ndevs, i;
  bool devices_active = false;

  /* Get the base device for this device type.  */
  base_dev = resolve_device (d, true);

  ndevs = base_dev->get_num_devices_func ();

  /* Unload all the devices of this type that have been opened.  */
  for (i = 0; i < ndevs; i++)
    {
      struct gomp_device_descr *acc_dev = &base_dev[i];

      gomp_mutex_lock (&acc_dev->lock);
      gomp_unload_device (acc_dev);
      gomp_mutex_unlock (&acc_dev->lock);
    }
  
  gomp_mutex_lock (&goacc_thread_lock);

  /* Free target-specific TLS data and close all devices.  */
  for (walk = goacc_threads; walk != NULL; walk = walk->next)
    {
      if (walk->target_tls)
	base_dev->openacc.destroy_thread_data_func (walk->target_tls);

      walk->target_tls = NULL;

      /* This would mean the user is shutting down OpenACC in the middle of an
         "acc data" pragma.  Likely not intentional.  */
      if (walk->mapped_data)
	{
	  gomp_mutex_unlock (&goacc_thread_lock);
	  gomp_fatal ("shutdown in 'acc data' region");
	}

      /* Similarly, if this happens then user code has done something weird.  */
      if (walk->saved_bound_dev)
	{
	  gomp_mutex_unlock (&goacc_thread_lock);
	  gomp_fatal ("shutdown during host fallback");
	}

      if (walk->dev)
	{
	  gomp_mutex_lock (&walk->dev->lock);
	  gomp_free_memmap (&walk->dev->mem_map);
	  gomp_mutex_unlock (&walk->dev->lock);

	  walk->dev = NULL;
	  walk->base_dev = NULL;
	}
    }

  gomp_mutex_unlock (&goacc_thread_lock);


  /* Close all the devices of this type that have been opened.  */
  for (i = 0; i < ndevs; i++)
    {
      struct gomp_device_descr *acc_dev = &base_dev[i];
      gomp_mutex_lock (&acc_dev->lock);
      if (acc_dev->is_initialized)
        {
	  devices_active = true;
	  gomp_fini_device (acc_dev);
	}
      gomp_mutex_unlock (&acc_dev->lock);
    }

  if (!devices_active)
    gomp_fatal ("no device initialized");
}
Пример #20
0
void
GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	       long arg_size, long arg_align, unsigned flags,
	       unsigned long num_tasks, int priority,
	       TYPE start, TYPE end, TYPE step)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    flags &= ~GOMP_TASK_FLAG_IF;
  flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team && gomp_team_barrier_cancelled (&team->barrier))
    return;

#ifdef TYPE_is_long
  TYPE s = step;
  if (step > 0)
    {
      if (start >= end)
	return;
      s--;
    }
  else
    {
      if (start <= end)
	return;
      s++;
    }
  UTYPE n = (end - start + s) / step;
#else
  UTYPE n;
  if (flags & GOMP_TASK_FLAG_UP)
    {
      if (start >= end)
	return;
      n = (end - start + step - 1) / step;
    }
  else
    {
      if (start <= end)
	return;
      n = (start - end - step - 1) / -step;
    }
#endif

  TYPE task_step = step;
  unsigned long nfirst = n;
  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
    {
      unsigned long grainsize = num_tasks;
#ifdef TYPE_is_long
      num_tasks = n / grainsize;
#else
      UTYPE ndiv = n / grainsize;
      num_tasks = ndiv;
      if (num_tasks != ndiv)
	num_tasks = ~0UL;
#endif
      if (num_tasks <= 1)
	{
	  num_tasks = 1;
	  task_step = end - start;
	}
      else if (num_tasks >= grainsize
#ifndef TYPE_is_long
	       && num_tasks != ~0UL
#endif
	      )
	{
	  UTYPE mul = num_tasks * grainsize;
	  task_step = (TYPE) grainsize * step;
	  if (mul != n)
	    {
	      task_step += step;
	      nfirst = n - mul - 1;
	    }
	}
      else
	{
	  UTYPE div = n / num_tasks;
	  UTYPE mod = n % num_tasks;
	  task_step = (TYPE) div * step;
	  if (mod)
	    {
	      task_step += step;
	      nfirst = mod - 1;
	    }
	}
    }
  else
    {
      if (num_tasks == 0)
	num_tasks = team ? team->nthreads : 1;
      if (num_tasks >= n)
	num_tasks = n;
      else
	{
	  UTYPE div = n / num_tasks;
	  UTYPE mod = n % num_tasks;
	  task_step = (TYPE) div * step;
	  if (mod)
	    {
	      task_step += step;
	      nfirst = mod - 1;
	    }
	}
    }

  if (flags & GOMP_TASK_FLAG_NOGROUP)
    {
      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
	return;
    }
  else
    ialias_call (GOMP_taskgroup_start) ();

  if (priority > gomp_max_task_priority_var)
    priority = gomp_max_task_priority_var;

  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count + num_tasks > 64 * team->nthreads)
    {
      unsigned long i;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  struct gomp_task task[num_tasks];
	  struct gomp_task *parent = thr->task;
	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
	  char buf[num_tasks * arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  char *orig_arg = arg;
	  for (i = 0; i < num_tasks; i++)
	    {
	      gomp_init_task (&task[i], parent, gomp_icv (false));
	      task[i].priority = priority;
	      task[i].kind = GOMP_TASK_UNDEFERRED;
	      task[i].final_task = (thr->task && thr->task->final_task)
				   || (flags & GOMP_TASK_FLAG_FINAL);
	      if (thr->task)
		{
		  task[i].in_tied_task = thr->task->in_tied_task;
		  task[i].taskgroup = thr->task->taskgroup;
		}
	      thr->task = &task[i];
	      cpyfn (arg, data);
	      arg += arg_size;
	    }
	  arg = orig_arg;
	  for (i = 0; i < num_tasks; i++)
	    {
	      thr->task = &task[i];
	      ((TYPE *)arg)[0] = start;
	      start += task_step;
	      ((TYPE *)arg)[1] = start;
	      if (i == nfirst)
		task_step -= step;
	      fn (arg);
	      arg += arg_size;
	      if (!priority_queue_empty_p (&task[i].children_queue,
					   MEMMODEL_RELAXED))
		{
		  gomp_mutex_lock (&team->task_lock);
		  gomp_clear_parent (&task[i].children_queue);
		  gomp_mutex_unlock (&team->task_lock);
		}
	      gomp_end_task ();
	    }
	}
      else
	for (i = 0; i < num_tasks; i++)
	  {
	    struct gomp_task task;

	    gomp_init_task (&task, thr->task, gomp_icv (false));
	    task.priority = priority;
	    task.kind = GOMP_TASK_UNDEFERRED;
	    task.final_task = (thr->task && thr->task->final_task)
			      || (flags & GOMP_TASK_FLAG_FINAL);
	    if (thr->task)
	      {
		task.in_tied_task = thr->task->in_tied_task;
		task.taskgroup = thr->task->taskgroup;
	      }
	    thr->task = &task;
	    ((TYPE *)data)[0] = start;
	    start += task_step;
	    ((TYPE *)data)[1] = start;
	    if (i == nfirst)
	      task_step -= step;
	    fn (data);
	    if (!priority_queue_empty_p (&task.children_queue,
					 MEMMODEL_RELAXED))
	      {
		gomp_mutex_lock (&team->task_lock);
		gomp_clear_parent (&task.children_queue);
		gomp_mutex_unlock (&team->task_lock);
	      }
	    gomp_end_task ();
	  }
    }
  else
    {
      struct gomp_task *tasks[num_tasks];
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      int do_wake;
      unsigned long i;

      for (i = 0; i < num_tasks; i++)
	{
	  struct gomp_task *task
	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
	  tasks[i] = task;
	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
			  & ~(uintptr_t) (arg_align - 1));
	  gomp_init_task (task, parent, gomp_icv (false));
	  task->priority = priority;
	  task->kind = GOMP_TASK_UNDEFERRED;
	  task->in_tied_task = parent->in_tied_task;
	  task->taskgroup = taskgroup;
	  thr->task = task;
	  if (cpyfn)
	    {
	      cpyfn (arg, data);
	      task->copy_ctors_done = true;
	    }
	  else
	    memcpy (arg, data, arg_size);
	  ((TYPE *)arg)[0] = start;
	  start += task_step;
	  ((TYPE *)arg)[1] = start;
	  if (i == nfirst)
	    task_step -= step;
	  thr->task = parent;
	  task->kind = GOMP_TASK_WAITING;
	  task->fn = fn;
	  task->fn_data = arg;
	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
	}
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && cpyfn == NULL, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  for (i = 0; i < num_tasks; i++)
	    {
	      gomp_finish_task (tasks[i]);
	      free (tasks[i]);
	    }
	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
	    ialias_call (GOMP_taskgroup_end) ();
	  return;
	}
      if (taskgroup)
	taskgroup->num_children += num_tasks;
      for (i = 0; i < num_tasks; i++)
	{
	  struct gomp_task *task = tasks[i];
	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
				 task, priority,
				 PRIORITY_INSERT_BEGIN,
				 /*last_parent_depends_on=*/false,
				 task->parent_depends_on);
	  if (taskgroup)
	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
				   task, priority, PRIORITY_INSERT_BEGIN,
				   /*last_parent_depends_on=*/false,
				   task->parent_depends_on);
	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
				 PRIORITY_INSERT_END,
				 /*last_parent_depends_on=*/false,
				 task->parent_depends_on);
	  ++team->task_count;
	  ++team->task_queued_count;
	}
      gomp_team_barrier_set_task_pending (&team->barrier);
      if (team->task_running_count + !parent->in_tied_task
	  < team->nthreads)
	{
	  do_wake = team->nthreads - team->task_running_count
		    - !parent->in_tied_task;
	  if ((unsigned long) do_wake > num_tasks)
	    do_wake = num_tasks;
	}
      else
	do_wake = 0;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, do_wake);
    }
  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
    ialias_call (GOMP_taskgroup_end) ();
}