Ejemplo n.º 1
0
void
GOMP_critical_name_start (void **pptr)
{
  gomp_mutex_t *plock;

  /* If a mutex fits within the space for a pointer, and is zero initialized,
     then use the pointer space directly.  */
  if (GOMP_MUTEX_INIT_0
      && sizeof (gomp_mutex_t) <= sizeof (void *)
      && __alignof (gomp_mutex_t) <= sizeof (void *))
    plock = (gomp_mutex_t *)pptr;

  /* Otherwise we have to be prepared to malloc storage.  */
  else
    {
      plock = *pptr;

      if (plock == NULL)
	{
#ifdef HAVE_SYNC_BUILTINS
	  gomp_mutex_t *nlock = gomp_malloc (sizeof (gomp_mutex_t));
	  gomp_mutex_init (nlock);

	  plock = __sync_val_compare_and_swap (pptr, NULL, nlock);
	  if (plock != NULL)
	    {
	      gomp_mutex_destroy (nlock);
	      gomp_free (nlock);
	    }
	  else
	    plock = nlock;
#else
	  gomp_mutex_lock (&create_lock_lock);
	  plock = *pptr;
	  if (plock == NULL)
	    {
	      plock = gomp_malloc (sizeof (gomp_mutex_t));
	      gomp_mutex_init (plock);
	      __sync_synchronize ();
	      *pptr = plock;
	    }
	  gomp_mutex_unlock (&create_lock_lock);
#endif
	}
    }

  gomp_mutex_lock (plock);

  /* OMP v3.1, 2.8.6 p81,l16 - "At entry to critical regions" */
  gomp_flush0();
}
Ejemplo n.º 2
0
static void
allocate_thread_pool_reservoirs (void)
{
  struct gomp_thread_pool_reservoir **reservoirs;
  size_t size = _Sched_Count () * sizeof (*reservoirs);
  reservoirs = gomp_malloc (size);
  gomp_thread_pool_reservoirs = reservoirs;
  memset (reservoirs, 0, size);
}
Ejemplo n.º 3
0
static struct gomp_thread_pool *gomp_new_thread_pool (void)
{
  struct gomp_thread_pool *pool
    = gomp_malloc (sizeof(struct gomp_thread_pool));
  pool->threads = NULL;
  pool->threads_size = 0;
  pool->threads_used = 0;
  pool->last_team = NULL;
  return pool;
}
Ejemplo n.º 4
0
Archivo: team.c Proyecto: 0mp/freebsd
static struct gomp_team *
new_team (unsigned nthreads, struct gomp_work_share *work_share)
{
  struct gomp_team *team;
  size_t size;

  size = sizeof (*team) + nthreads * sizeof (team->ordered_release[0]);
  team = gomp_malloc (size);
  gomp_mutex_init (&team->work_share_lock);

  team->work_shares = gomp_malloc (4 * sizeof (struct gomp_work_share *));
  team->generation_mask = 3;
  team->oldest_live_gen = work_share == NULL;
  team->num_live_gen = work_share != NULL;
  team->work_shares[0] = work_share;

  team->nthreads = nthreads;
  gomp_barrier_init (&team->barrier, nthreads);

  gomp_sem_init (&team->master_release, 0);
  team->ordered_release[0] = &team->master_release;

  return team;
}
Ejemplo n.º 5
0
struct gomp_team *
gomp_new_team (unsigned nthreads)
{
  struct gomp_team *team;
  int i;

  team = get_last_team (nthreads);
  if (team == NULL)
    {
      size_t extra = sizeof (team->ordered_release[0])
		     + sizeof (team->implicit_task[0]);
      team = gomp_malloc (sizeof (*team) + nthreads * extra);

#ifndef HAVE_SYNC_BUILTINS
      gomp_mutex_init (&team->work_share_list_free_lock);
#endif
      gomp_barrier_init (&team->barrier, nthreads);
      gomp_mutex_init (&team->task_lock);

      team->nthreads = nthreads;
    }

  team->work_share_chunk = 8;
#ifdef HAVE_SYNC_BUILTINS
  team->single_count = 0;
#endif
  team->work_shares_to_free = &team->work_shares[0];
  gomp_init_work_share (&team->work_shares[0], false, nthreads);
  team->work_shares[0].next_alloc = NULL;
  team->work_share_list_free = NULL;
  team->work_share_list_alloc = &team->work_shares[1];
  for (i = 1; i < 7; i++)
    team->work_shares[i].next_free = &team->work_shares[i + 1];
  team->work_shares[i].next_free = NULL;

  gomp_sem_init (&team->master_release, 0);
  team->ordered_release = (void *) &team->implicit_task[nthreads];
  team->ordered_release[0] = &team->master_release;

  priority_queue_init (&team->task_queue);
  team->task_count = 0;
  team->task_queued_count = 0;
  team->task_running_count = 0;
  team->work_share_cancelled = 0;
  team->team_cancelled = 0;

  return team;
}
Ejemplo n.º 6
0
static struct goacc_thread *
goacc_new_thread (void)
{
  struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));

#if defined HAVE_TLS || defined USE_EMUTLS
  goacc_tls_data = thr;
#else
  pthread_setspecific (goacc_tls_key, thr);
#endif

  pthread_setspecific (goacc_cleanup_key, thr);

  gomp_mutex_lock (&goacc_thread_lock);
  thr->next = goacc_threads;
  goacc_threads = thr;
  gomp_mutex_unlock (&goacc_thread_lock);

  return thr;
}
Ejemplo n.º 7
0
struct gomp_team *
gomp_new_team (unsigned nthreads)
{
  struct gomp_team *team;
  size_t size;
  int i;

  size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
				      + sizeof (team->implicit_task[0]));
  team = gomp_malloc (size);

  team->work_share_chunk = 8;
#ifdef HAVE_SYNC_BUILTINS
  team->single_count = 0;
#else
  gomp_mutex_init (&team->work_share_list_free_lock);
#endif
  gomp_init_work_share (&team->work_shares[0], false, nthreads);
  team->work_shares[0].next_alloc = NULL;
  team->work_share_list_free = NULL;
  team->work_share_list_alloc = &team->work_shares[1];
  for (i = 1; i < 7; i++)
    team->work_shares[i].next_free = &team->work_shares[i + 1];
  team->work_shares[i].next_free = NULL;

  team->nthreads = nthreads;
  gomp_barrier_init (&team->barrier, nthreads);

  gomp_sem_init (&team->master_release, 0);
  team->ordered_release = (void *) &team->implicit_task[nthreads];
  team->ordered_release[0] = &team->master_release;

  gomp_mutex_init (&team->task_lock);
  team->task_queue = NULL;
  team->task_count = 0;
  team->task_running_count = 0;

  return team;
}
Ejemplo n.º 8
0
Archivo: task.c Proyecto: jtramm/gcc
void
GOMP_taskgroup_start (void)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  struct gomp_task *task = thr->task;
  struct gomp_taskgroup *taskgroup;

  /* If team is NULL, all tasks are executed as
     GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
     taskgroup and their descendant tasks will be finished
     by the time GOMP_taskgroup_end is called.  */
  if (team == NULL)
    return;
  taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
  taskgroup->prev = task->taskgroup;
  taskgroup->children = NULL;
  taskgroup->in_taskgroup_wait = false;
  taskgroup->cancelled = false;
  taskgroup->num_children = 0;
  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
  task->taskgroup = taskgroup;
}
Ejemplo n.º 9
0
static void
allocate_thread_pool_reservoir (unsigned long count, unsigned long priority,
				unsigned long scheduler)
{
  struct gomp_thread_pool_reservoir *res;
  struct gomp_thread_pool *pools;
  unsigned long i;
  size_t size;

  res = gomp_thread_pool_reservoirs[scheduler];
  if (res != NULL)
    gomp_fatal ("Multiple thread pool reservoir initialization");
  size = sizeof (*res) + count * (sizeof(pools) + sizeof(*pools));
  pools = gomp_malloc (size);
  memset (pools, 0, size);
  res = (struct gomp_thread_pool_reservoir *) (pools + count);
  res->index = count;
  res->priority = priority;
  gomp_sem_init (&res->available, count);
  gomp_mutex_init (&res->lock);
  for (i = 0; i < count; ++i)
    res->pools[i] = &pools[i];
  gomp_thread_pool_reservoirs[scheduler] = res;
}
Ejemplo n.º 10
0
Archivo: proc.c Proyecto: ds2dev/gcc
void
gomp_init_num_threads (void)
{
#ifdef HAVE_PTHREAD_AFFINITY_NP
#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
  gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
  gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size);
#else
  gomp_cpuset_size = sizeof (cpu_set_t);
#endif

  gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size);
  do
    {
      int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
					gomp_cpusetp);
      if (ret == 0)
	{
	  /* Count only the CPUs this process can use.  */
	  gomp_global_icv.nthreads_var
	    = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
	  if (gomp_global_icv.nthreads_var == 0)
	    break;
	  gomp_get_cpuset_size = gomp_cpuset_size;
#ifdef CPU_ALLOC_SIZE
	  unsigned long i;
	  for (i = gomp_cpuset_size * 8; i; i--)
	    if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp))
	      break;
	  gomp_cpuset_size = CPU_ALLOC_SIZE (i);
#endif
	  return;
	}
      if (ret != EINVAL)
	break;
#ifdef CPU_ALLOC_SIZE
      if (gomp_cpuset_size < sizeof (cpu_set_t))
	gomp_cpuset_size = sizeof (cpu_set_t);
      else
	gomp_cpuset_size = gomp_cpuset_size * 2;
      if (gomp_cpuset_size < 8 * sizeof (cpu_set_t))
	gomp_cpusetp
	  = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size);
      else
	{
	  /* Avoid gomp_fatal if too large memory allocation would be
	     requested, e.g. kernel returning EINVAL all the time.  */
	  void *p = realloc (gomp_cpusetp, gomp_cpuset_size);
	  if (p == NULL)
	    break;
	  gomp_cpusetp = (cpu_set_t *) p;
	}
#else
      break;
#endif
    }
  while (1);
  gomp_cpuset_size = 0;
  gomp_global_icv.nthreads_var = 1;
  free (gomp_cpusetp);
  gomp_cpusetp = NULL;
#endif
#if defined(__ANDROID__)
  gomp_global_icv.nthreads_var = sc_nprocessors_actu ();
#elif defined(_SC_NPROCESSORS_ONLN)
  gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
#endif
}
Ejemplo n.º 11
0
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	   long arg_size, long arg_align, bool if_clause, unsigned flags,
	   void **depend)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    if_clause = false;
  if (flags & 1)
    flags &= ~1;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  if (!if_clause || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count > 64 * team->nthreads)
    {
      struct gomp_task task;

      /* If there are depend clauses and earlier deferred sibling tasks
	 with depend clauses, check if there isn't a dependency.  If there
	 is, we need to wait for them.  There is no need to handle
	 depend clauses for non-deferred tasks other than this, because
	 the parent task is suspended until the child task finishes and thus
	 it can't start further child tasks.  */
      if ((flags & 8) && thr->task && thr->task->depend_hash)
	gomp_task_maybe_wait_for_dependencies (depend);

      gomp_init_task (&task, thr->task, gomp_icv (false));
      task.kind = GOMP_TASK_IFFALSE;
      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
      if (thr->task)
	{
	  task.in_tied_task = thr->task->in_tied_task;
	  task.taskgroup = thr->task->taskgroup;
	}
      thr->task = &task;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  char buf[arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  cpyfn (arg, data);
	  fn (arg);
	}
      else
	fn (data);
      /* Access to "children" is normally done inside a task_lock
	 mutex region, but the only way this particular task.children
	 can be set is if this thread's task work function (fn)
	 creates children.  So since the setter is *this* thread, we
	 need no barriers here when testing for non-NULL.  We can have
	 task.children set by the current thread then changed by a
	 child thread, but seeing a stale non-NULL value is not a
	 problem.  Once past the task_lock acquisition, this thread
	 will see the real value of task.children.  */
      if (task.children != NULL)
	{
	  gomp_mutex_lock (&team->task_lock);
	  gomp_clear_parent (task.children);
	  gomp_mutex_unlock (&team->task_lock);
	}
      gomp_end_task ();
    }
  else
    {
      struct gomp_task *task;
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      bool do_wake;
      size_t depend_size = 0;

      if (flags & 8)
	depend_size = ((uintptr_t) depend[0]
		       * sizeof (struct gomp_task_depend_entry));
      task = gomp_malloc (sizeof (*task) + depend_size
			  + arg_size + arg_align - 1);
      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
		      & ~(uintptr_t) (arg_align - 1));
      gomp_init_task (task, parent, gomp_icv (false));
      task->kind = GOMP_TASK_IFFALSE;
      task->in_tied_task = parent->in_tied_task;
      task->taskgroup = taskgroup;
      thr->task = task;
      if (cpyfn)
	{
	  cpyfn (arg, data);
	  task->copy_ctors_done = true;
	}
      else
	memcpy (arg, data, arg_size);
      thr->task = parent;
      task->kind = GOMP_TASK_WAITING;
      task->fn = fn;
      task->fn_data = arg;
      task->final_task = (flags & 2) >> 1;
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && !task->copy_ctors_done, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  gomp_finish_task (task);
	  free (task);
	  return;
	}
      if (taskgroup)
	taskgroup->num_children++;
      if (depend_size)
	{
	  size_t ndepend = (uintptr_t) depend[0];
	  size_t nout = (uintptr_t) depend[1];
	  size_t i;
	  hash_entry_type ent;

	  task->depend_count = ndepend;
	  task->num_dependees = 0;
	  if (parent->depend_hash == NULL)
	    parent->depend_hash
	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
	  for (i = 0; i < ndepend; i++)
	    {
	      task->depend[i].addr = depend[2 + i];
	      task->depend[i].next = NULL;
	      task->depend[i].prev = NULL;
	      task->depend[i].task = task;
	      task->depend[i].is_in = i >= nout;
	      task->depend[i].redundant = false;
	      task->depend[i].redundant_out = false;

	      hash_entry_type *slot
		= htab_find_slot (&parent->depend_hash, &task->depend[i],
				  INSERT);
	      hash_entry_type out = NULL, last = NULL;
	      if (*slot)
		{
		  /* If multiple depends on the same task are the
		     same, all but the first one are redundant.
		     As inout/out come first, if any of them is
		     inout/out, it will win, which is the right
		     semantics.  */
		  if ((*slot)->task == task)
		    {
		      task->depend[i].redundant = true;
		      continue;
		    }
		  for (ent = *slot; ent; ent = ent->next)
		    {
		      if (ent->redundant_out)
			break;

		      last = ent;

		      /* depend(in:...) doesn't depend on earlier
			 depend(in:...).  */
		      if (i >= nout && ent->is_in)
			continue;

		      if (!ent->is_in)
			out = ent;

		      struct gomp_task *tsk = ent->task;
		      if (tsk->dependers == NULL)
			{
			  tsk->dependers
			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
					   + 6 * sizeof (struct gomp_task *));
			  tsk->dependers->n_elem = 1;
			  tsk->dependers->allocated = 6;
			  tsk->dependers->elem[0] = task;
			  task->num_dependees++;
			  continue;
			}
		      /* We already have some other dependency on tsk
			 from earlier depend clause.  */
		      else if (tsk->dependers->n_elem
			       && (tsk->dependers->elem[tsk->dependers->n_elem
							- 1]
				   == task))
			continue;
		      else if (tsk->dependers->n_elem
			       == tsk->dependers->allocated)
			{
			  tsk->dependers->allocated
			    = tsk->dependers->allocated * 2 + 2;
			  tsk->dependers
			    = gomp_realloc (tsk->dependers,
					    sizeof (struct gomp_dependers_vec)
					    + (tsk->dependers->allocated
					       * sizeof (struct gomp_task *)));
			}
		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
		      task->num_dependees++;
		    }
		  task->depend[i].next = *slot;
		  (*slot)->prev = &task->depend[i];
		}
	      *slot = &task->depend[i];

	      /* There is no need to store more than one depend({,in}out:)
		 task per address in the hash table chain for the purpose
		 of creation of deferred tasks, because each out
		 depends on all earlier outs, thus it is enough to record
		 just the last depend({,in}out:).  For depend(in:), we need
		 to keep all of the previous ones not terminated yet, because
		 a later depend({,in}out:) might need to depend on all of
		 them.  So, if the new task's clause is depend({,in}out:),
		 we know there is at most one other depend({,in}out:) clause
		 in the list (out).  For non-deferred tasks we want to see
		 all outs, so they are moved to the end of the chain,
		 after first redundant_out entry all following entries
		 should be redundant_out.  */
	      if (!task->depend[i].is_in && out)
		{
		  if (out != last)
		    {
		      out->next->prev = out->prev;
		      out->prev->next = out->next;
		      out->next = last->next;
		      out->prev = last;
		      last->next = out;
		      if (out->next)
			out->next->prev = out;
		    }
		  out->redundant_out = true;
		}
	    }
	  if (task->num_dependees)
	    {
	      gomp_mutex_unlock (&team->task_lock);
	      return;
	    }
	}
      if (parent->children)
	{
	  task->next_child = parent->children;
	  task->prev_child = parent->children->prev_child;
	  task->next_child->prev_child = task;
	  task->prev_child->next_child = task;
	}
      else
	{
	  task->next_child = task;
	  task->prev_child = task;
	}
      parent->children = task;
      if (taskgroup)
	{
	  if (taskgroup->children)
	    {
	      task->next_taskgroup = taskgroup->children;
	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	      task->next_taskgroup->prev_taskgroup = task;
	      task->prev_taskgroup->next_taskgroup = task;
	    }
	  else
	    {
	      task->next_taskgroup = task;
	      task->prev_taskgroup = task;
	    }
	  taskgroup->children = task;
	}
      if (team->task_queue)
	{
	  task->next_queue = team->task_queue;
	  task->prev_queue = team->task_queue->prev_queue;
	  task->next_queue->prev_queue = task;
	  task->prev_queue->next_queue = task;
	}
      else
	{
	  task->next_queue = task;
	  task->prev_queue = task;
	  team->task_queue = task;
	}
      ++team->task_count;
      ++team->task_queued_count;
      gomp_team_barrier_set_task_pending (&team->barrier);
      do_wake = team->task_running_count + !parent->in_tied_task
		< team->nthreads;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, 1);
    }
}
Ejemplo n.º 12
0
void
gomp_doacross_init (unsigned ncounts, long *counts, long chunk_size)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  struct gomp_work_share *ws = thr->ts.work_share;
  unsigned int i, bits[MAX_COLLAPSED_BITS], num_bits = 0;
  unsigned long ent, num_ents, elt_sz, shift_sz;
  struct gomp_doacross_work_share *doacross;

  if (team == NULL || team->nthreads == 1)
    return;

  for (i = 0; i < ncounts; i++)
    {
      /* If any count is 0, GOMP_doacross_{post,wait} can't be called.  */
      if (counts[i] == 0)
	return;

      if (num_bits <= MAX_COLLAPSED_BITS)
	{
	  unsigned int this_bits;
	  if (counts[i] == 1)
	    this_bits = 1;
	  else
	    this_bits = __SIZEOF_LONG__ * __CHAR_BIT__
			- __builtin_clzl (counts[i] - 1);
	  if (num_bits + this_bits <= MAX_COLLAPSED_BITS)
	    {
	      bits[i] = this_bits;
	      num_bits += this_bits;
	    }
	  else
	    num_bits = MAX_COLLAPSED_BITS + 1;
	}
    }

  if (ws->sched == GFS_STATIC)
    num_ents = team->nthreads;
  else if (ws->sched == GFS_GUIDED)
    num_ents = counts[0];
  else
    num_ents = (counts[0] - 1) / chunk_size + 1;
  if (num_bits <= MAX_COLLAPSED_BITS)
    {
      elt_sz = sizeof (unsigned long);
      shift_sz = ncounts * sizeof (unsigned int);
    }
  else
    {
      elt_sz = sizeof (unsigned long) * ncounts;
      shift_sz = 0;
    }
  elt_sz = (elt_sz + 63) & ~63UL;

  doacross = gomp_malloc (sizeof (*doacross) + 63 + num_ents * elt_sz
			  + shift_sz);
  doacross->chunk_size = chunk_size;
  doacross->elt_sz = elt_sz;
  doacross->ncounts = ncounts;
  doacross->flattened = false;
  doacross->array = (unsigned char *)
		    ((((uintptr_t) (doacross + 1)) + 63 + shift_sz)
		     & ~(uintptr_t) 63);
  if (num_bits <= MAX_COLLAPSED_BITS)
    {
      unsigned int shift_count = 0;
      doacross->flattened = true;
      for (i = ncounts; i > 0; i--)
	{
	  doacross->shift_counts[i - 1] = shift_count;
	  shift_count += bits[i - 1];
	}
      for (ent = 0; ent < num_ents; ent++)
	*(unsigned long *) (doacross->array + ent * elt_sz) = 0;
    }
  else
    for (ent = 0; ent < num_ents; ent++)
      memset (doacross->array + ent * elt_sz, '\0',
	      sizeof (unsigned long) * ncounts);
  if (ws->sched == GFS_STATIC && chunk_size == 0)
    {
      unsigned long q = counts[0] / num_ents;
      unsigned long t = counts[0] % num_ents;
      doacross->boundary = t * (q + 1);
      doacross->q = q;
      doacross->t = t;
    }
  ws->doacross = doacross;
}
Ejemplo n.º 13
0
Archivo: task.c Proyecto: jtramm/gcc
void
gomp_create_target_task (struct gomp_device_descr *devicep,
			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
			 size_t *sizes, unsigned short *kinds,
			 unsigned int flags, void **depend)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  struct gomp_target_task *ttask;
  struct gomp_task *task;
  struct gomp_task *parent = thr->task;
  struct gomp_taskgroup *taskgroup = parent->taskgroup;
  bool do_wake;
  size_t depend_size = 0;

  if (depend != NULL)
    depend_size = ((uintptr_t) depend[0]
		   * sizeof (struct gomp_task_depend_entry));
  task = gomp_malloc (sizeof (*task) + depend_size
		      + sizeof (*ttask)
		      + mapnum * (sizeof (void *) + sizeof (size_t)
				  + sizeof (unsigned short)));
  gomp_init_task (task, parent, gomp_icv (false));
  task->kind = GOMP_TASK_WAITING;
  task->in_tied_task = parent->in_tied_task;
  task->taskgroup = taskgroup;
  ttask = (struct gomp_target_task *) &task->depend[(uintptr_t) depend[0]];
  ttask->devicep = devicep;
  ttask->fn = fn;
  ttask->mapnum = mapnum;
  memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
  memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
  ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
  memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
  ttask->flags = flags;
  task->fn = gomp_target_task_fn;
  task->fn_data = ttask;
  task->final_task = 0;
  gomp_mutex_lock (&team->task_lock);
  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
			|| (taskgroup && taskgroup->cancelled), 0))
    {
      gomp_mutex_unlock (&team->task_lock);
      gomp_finish_task (task);
      free (task);
      return;
    }
  if (taskgroup)
    taskgroup->num_children++;
  if (depend_size)
    {
      gomp_task_handle_depend (task, parent, depend);
      if (task->num_dependees)
	{
	  gomp_mutex_unlock (&team->task_lock);
	  return;
	}
    }
  if (parent->children)
    {
      task->next_child = parent->children;
      task->prev_child = parent->children->prev_child;
      task->next_child->prev_child = task;
      task->prev_child->next_child = task;
    }
  else
    {
      task->next_child = task;
      task->prev_child = task;
    }
  parent->children = task;
  if (taskgroup)
    {
      /* If applicable, place task into its taskgroup.  */
      if (taskgroup->children)
	{
	  task->next_taskgroup = taskgroup->children;
	  task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	  task->next_taskgroup->prev_taskgroup = task;
	  task->prev_taskgroup->next_taskgroup = task;
	}
      else
	{
	  task->next_taskgroup = task;
	  task->prev_taskgroup = task;
	}
      taskgroup->children = task;
    }
  if (team->task_queue)
    {
      task->next_queue = team->task_queue;
      task->prev_queue = team->task_queue->prev_queue;
      task->next_queue->prev_queue = task;
      task->prev_queue->next_queue = task;
    }
  else
    {
      task->next_queue = task;
      task->prev_queue = task;
      team->task_queue = task;
    }
  ++team->task_count;
  ++team->task_queued_count;
  gomp_team_barrier_set_task_pending (&team->barrier);
  do_wake = team->task_running_count + !parent->in_tied_task
	    < team->nthreads;
  gomp_mutex_unlock (&team->task_lock);
  if (do_wake)
    gomp_team_barrier_wake (&team->barrier, 1);
}
Ejemplo n.º 14
0
Archivo: task.c Proyecto: jtramm/gcc
static inline void *
htab_alloc (size_t size)
{
  return gomp_malloc (size);
}
Ejemplo n.º 15
0
Archivo: task.c Proyecto: jtramm/gcc
void
GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	   long arg_size, long arg_align, bool if_clause, unsigned flags,
	   void **depend, int priority)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    if_clause = false;
  flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team
      && (gomp_team_barrier_cancelled (&team->barrier)
	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
    return;

  if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
    priority = 0;
  /* FIXME, use priority.  */
  (void) priority;

  if (!if_clause || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count > 64 * team->nthreads)
    {
      struct gomp_task task;

      /* If there are depend clauses and earlier deferred sibling tasks
	 with depend clauses, check if there isn't a dependency.  If there
	 is, we need to wait for them.  There is no need to handle
	 depend clauses for non-deferred tasks other than this, because
	 the parent task is suspended until the child task finishes and thus
	 it can't start further child tasks.  */
      if ((flags & GOMP_TASK_FLAG_DEPEND)
	  && thr->task && thr->task->depend_hash)
	gomp_task_maybe_wait_for_dependencies (depend);

      gomp_init_task (&task, thr->task, gomp_icv (false));
      task.kind = GOMP_TASK_UNDEFERRED;
      task.final_task = (thr->task && thr->task->final_task)
			|| (flags & GOMP_TASK_FLAG_FINAL);
      if (thr->task)
	{
	  task.in_tied_task = thr->task->in_tied_task;
	  task.taskgroup = thr->task->taskgroup;
	}
      thr->task = &task;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  char buf[arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  cpyfn (arg, data);
	  fn (arg);
	}
      else
	fn (data);
      /* Access to "children" is normally done inside a task_lock
	 mutex region, but the only way this particular task.children
	 can be set is if this thread's task work function (fn)
	 creates children.  So since the setter is *this* thread, we
	 need no barriers here when testing for non-NULL.  We can have
	 task.children set by the current thread then changed by a
	 child thread, but seeing a stale non-NULL value is not a
	 problem.  Once past the task_lock acquisition, this thread
	 will see the real value of task.children.  */
      if (task.children != NULL)
	{
	  gomp_mutex_lock (&team->task_lock);
	  gomp_clear_parent (task.children);
	  gomp_mutex_unlock (&team->task_lock);
	}
      gomp_end_task ();
    }
  else
    {
      struct gomp_task *task;
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      bool do_wake;
      size_t depend_size = 0;

      if (flags & GOMP_TASK_FLAG_DEPEND)
	depend_size = ((uintptr_t) depend[0]
		       * sizeof (struct gomp_task_depend_entry));
      task = gomp_malloc (sizeof (*task) + depend_size
			  + arg_size + arg_align - 1);
      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
		      & ~(uintptr_t) (arg_align - 1));
      gomp_init_task (task, parent, gomp_icv (false));
      task->kind = GOMP_TASK_UNDEFERRED;
      task->in_tied_task = parent->in_tied_task;
      task->taskgroup = taskgroup;
      thr->task = task;
      if (cpyfn)
	{
	  cpyfn (arg, data);
	  task->copy_ctors_done = true;
	}
      else
	memcpy (arg, data, arg_size);
      thr->task = parent;
      task->kind = GOMP_TASK_WAITING;
      task->fn = fn;
      task->fn_data = arg;
      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && !task->copy_ctors_done, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  gomp_finish_task (task);
	  free (task);
	  return;
	}
      if (taskgroup)
	taskgroup->num_children++;
      if (depend_size)
	{
	  gomp_task_handle_depend (task, parent, depend);
	  if (task->num_dependees)
	    {
	      gomp_mutex_unlock (&team->task_lock);
	      return;
	    }
	}
      if (parent->children)
	{
	  task->next_child = parent->children;
	  task->prev_child = parent->children->prev_child;
	  task->next_child->prev_child = task;
	  task->prev_child->next_child = task;
	}
      else
	{
	  task->next_child = task;
	  task->prev_child = task;
	}
      parent->children = task;
      if (taskgroup)
	{
	  /* If applicable, place task into its taskgroup.  */
	  if (taskgroup->children)
	    {
	      task->next_taskgroup = taskgroup->children;
	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
	      task->next_taskgroup->prev_taskgroup = task;
	      task->prev_taskgroup->next_taskgroup = task;
	    }
	  else
	    {
	      task->next_taskgroup = task;
	      task->prev_taskgroup = task;
	    }
	  taskgroup->children = task;
	}
      if (team->task_queue)
	{
	  task->next_queue = team->task_queue;
	  task->prev_queue = team->task_queue->prev_queue;
	  task->next_queue->prev_queue = task;
	  task->prev_queue->next_queue = task;
	}
      else
	{
	  task->next_queue = task;
	  task->prev_queue = task;
	  team->task_queue = task;
	}
      ++team->task_count;
      ++team->task_queued_count;
      gomp_team_barrier_set_task_pending (&team->barrier);
      do_wake = team->task_running_count + !parent->in_tied_task
		< team->nthreads;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, 1);
    }
}
Ejemplo n.º 16
0
Archivo: task.c Proyecto: jtramm/gcc
static void
gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
			 void **depend)
{
  size_t ndepend = (uintptr_t) depend[0];
  size_t nout = (uintptr_t) depend[1];
  size_t i;
  hash_entry_type ent;

  task->depend_count = ndepend;
  task->num_dependees = 0;
  if (parent->depend_hash == NULL)
    parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
  for (i = 0; i < ndepend; i++)
    {
      task->depend[i].addr = depend[2 + i];
      task->depend[i].next = NULL;
      task->depend[i].prev = NULL;
      task->depend[i].task = task;
      task->depend[i].is_in = i >= nout;
      task->depend[i].redundant = false;
      task->depend[i].redundant_out = false;

      hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
					      &task->depend[i], INSERT);
      hash_entry_type out = NULL, last = NULL;
      if (*slot)
	{
	  /* If multiple depends on the same task are the same, all but the
	     first one are redundant.  As inout/out come first, if any of them
	     is inout/out, it will win, which is the right semantics.  */
	  if ((*slot)->task == task)
	    {
	      task->depend[i].redundant = true;
	      continue;
	    }
	  for (ent = *slot; ent; ent = ent->next)
	    {
	      if (ent->redundant_out)
		break;

	      last = ent;

	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
	      if (i >= nout && ent->is_in)
		continue;

	      if (!ent->is_in)
		out = ent;

	      struct gomp_task *tsk = ent->task;
	      if (tsk->dependers == NULL)
		{
		  tsk->dependers
		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
				   + 6 * sizeof (struct gomp_task *));
		  tsk->dependers->n_elem = 1;
		  tsk->dependers->allocated = 6;
		  tsk->dependers->elem[0] = task;
		  task->num_dependees++;
		  continue;
		}
	      /* We already have some other dependency on tsk from earlier
		 depend clause.  */
	      else if (tsk->dependers->n_elem
		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
			   == task))
		continue;
	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
		{
		  tsk->dependers->allocated
		    = tsk->dependers->allocated * 2 + 2;
		  tsk->dependers
		    = gomp_realloc (tsk->dependers,
				    sizeof (struct gomp_dependers_vec)
				    + (tsk->dependers->allocated
				       * sizeof (struct gomp_task *)));
		}
	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
	      task->num_dependees++;
	    }
	  task->depend[i].next = *slot;
	  (*slot)->prev = &task->depend[i];
	}
      *slot = &task->depend[i];

      /* There is no need to store more than one depend({,in}out:) task per
	 address in the hash table chain for the purpose of creation of
	 deferred tasks, because each out depends on all earlier outs, thus it
	 is enough to record just the last depend({,in}out:).  For depend(in:),
	 we need to keep all of the previous ones not terminated yet, because
	 a later depend({,in}out:) might need to depend on all of them.  So, if
	 the new task's clause is depend({,in}out:), we know there is at most
	 one other depend({,in}out:) clause in the list (out).  For
	 non-deferred tasks we want to see all outs, so they are moved to the
	 end of the chain, after first redundant_out entry all following
	 entries should be redundant_out.  */
      if (!task->depend[i].is_in && out)
	{
	  if (out != last)
	    {
	      out->next->prev = out->prev;
	      out->prev->next = out->next;
	      out->next = last->next;
	      out->prev = last;
	      last->next = out;
	      if (out->next)
		out->next->prev = out;
	    }
	  out->redundant_out = true;
	}
    }
}
Ejemplo n.º 17
0
void *
GOMP_PLUGIN_malloc (size_t size)
{
  return gomp_malloc (size);
}
Ejemplo n.º 18
0
void
GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
	       long arg_size, long arg_align, unsigned flags,
	       unsigned long num_tasks, int priority,
	       TYPE start, TYPE end, TYPE step)
{
  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;

#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     tied to one thread all the time.  This means UNTIED tasks must be
     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     might be running on different thread than FN.  */
  if (cpyfn)
    flags &= ~GOMP_TASK_FLAG_IF;
  flags &= ~GOMP_TASK_FLAG_UNTIED;
#endif

  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
  if (team && gomp_team_barrier_cancelled (&team->barrier))
    return;

#ifdef TYPE_is_long
  TYPE s = step;
  if (step > 0)
    {
      if (start >= end)
	return;
      s--;
    }
  else
    {
      if (start <= end)
	return;
      s++;
    }
  UTYPE n = (end - start + s) / step;
#else
  UTYPE n;
  if (flags & GOMP_TASK_FLAG_UP)
    {
      if (start >= end)
	return;
      n = (end - start + step - 1) / step;
    }
  else
    {
      if (start <= end)
	return;
      n = (start - end - step - 1) / -step;
    }
#endif

  TYPE task_step = step;
  unsigned long nfirst = n;
  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
    {
      unsigned long grainsize = num_tasks;
#ifdef TYPE_is_long
      num_tasks = n / grainsize;
#else
      UTYPE ndiv = n / grainsize;
      num_tasks = ndiv;
      if (num_tasks != ndiv)
	num_tasks = ~0UL;
#endif
      if (num_tasks <= 1)
	{
	  num_tasks = 1;
	  task_step = end - start;
	}
      else if (num_tasks >= grainsize
#ifndef TYPE_is_long
	       && num_tasks != ~0UL
#endif
	      )
	{
	  UTYPE mul = num_tasks * grainsize;
	  task_step = (TYPE) grainsize * step;
	  if (mul != n)
	    {
	      task_step += step;
	      nfirst = n - mul - 1;
	    }
	}
      else
	{
	  UTYPE div = n / num_tasks;
	  UTYPE mod = n % num_tasks;
	  task_step = (TYPE) div * step;
	  if (mod)
	    {
	      task_step += step;
	      nfirst = mod - 1;
	    }
	}
    }
  else
    {
      if (num_tasks == 0)
	num_tasks = team ? team->nthreads : 1;
      if (num_tasks >= n)
	num_tasks = n;
      else
	{
	  UTYPE div = n / num_tasks;
	  UTYPE mod = n % num_tasks;
	  task_step = (TYPE) div * step;
	  if (mod)
	    {
	      task_step += step;
	      nfirst = mod - 1;
	    }
	}
    }

  if (flags & GOMP_TASK_FLAG_NOGROUP)
    {
      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
	return;
    }
  else
    ialias_call (GOMP_taskgroup_start) ();

  if (priority > gomp_max_task_priority_var)
    priority = gomp_max_task_priority_var;

  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
      || (thr->task && thr->task->final_task)
      || team->task_count + num_tasks > 64 * team->nthreads)
    {
      unsigned long i;
      if (__builtin_expect (cpyfn != NULL, 0))
	{
	  struct gomp_task task[num_tasks];
	  struct gomp_task *parent = thr->task;
	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
	  char buf[num_tasks * arg_size + arg_align - 1];
	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
				& ~(uintptr_t) (arg_align - 1));
	  char *orig_arg = arg;
	  for (i = 0; i < num_tasks; i++)
	    {
	      gomp_init_task (&task[i], parent, gomp_icv (false));
	      task[i].priority = priority;
	      task[i].kind = GOMP_TASK_UNDEFERRED;
	      task[i].final_task = (thr->task && thr->task->final_task)
				   || (flags & GOMP_TASK_FLAG_FINAL);
	      if (thr->task)
		{
		  task[i].in_tied_task = thr->task->in_tied_task;
		  task[i].taskgroup = thr->task->taskgroup;
		}
	      thr->task = &task[i];
	      cpyfn (arg, data);
	      arg += arg_size;
	    }
	  arg = orig_arg;
	  for (i = 0; i < num_tasks; i++)
	    {
	      thr->task = &task[i];
	      ((TYPE *)arg)[0] = start;
	      start += task_step;
	      ((TYPE *)arg)[1] = start;
	      if (i == nfirst)
		task_step -= step;
	      fn (arg);
	      arg += arg_size;
	      if (!priority_queue_empty_p (&task[i].children_queue,
					   MEMMODEL_RELAXED))
		{
		  gomp_mutex_lock (&team->task_lock);
		  gomp_clear_parent (&task[i].children_queue);
		  gomp_mutex_unlock (&team->task_lock);
		}
	      gomp_end_task ();
	    }
	}
      else
	for (i = 0; i < num_tasks; i++)
	  {
	    struct gomp_task task;

	    gomp_init_task (&task, thr->task, gomp_icv (false));
	    task.priority = priority;
	    task.kind = GOMP_TASK_UNDEFERRED;
	    task.final_task = (thr->task && thr->task->final_task)
			      || (flags & GOMP_TASK_FLAG_FINAL);
	    if (thr->task)
	      {
		task.in_tied_task = thr->task->in_tied_task;
		task.taskgroup = thr->task->taskgroup;
	      }
	    thr->task = &task;
	    ((TYPE *)data)[0] = start;
	    start += task_step;
	    ((TYPE *)data)[1] = start;
	    if (i == nfirst)
	      task_step -= step;
	    fn (data);
	    if (!priority_queue_empty_p (&task.children_queue,
					 MEMMODEL_RELAXED))
	      {
		gomp_mutex_lock (&team->task_lock);
		gomp_clear_parent (&task.children_queue);
		gomp_mutex_unlock (&team->task_lock);
	      }
	    gomp_end_task ();
	  }
    }
  else
    {
      struct gomp_task *tasks[num_tasks];
      struct gomp_task *parent = thr->task;
      struct gomp_taskgroup *taskgroup = parent->taskgroup;
      char *arg;
      int do_wake;
      unsigned long i;

      for (i = 0; i < num_tasks; i++)
	{
	  struct gomp_task *task
	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
	  tasks[i] = task;
	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
			  & ~(uintptr_t) (arg_align - 1));
	  gomp_init_task (task, parent, gomp_icv (false));
	  task->priority = priority;
	  task->kind = GOMP_TASK_UNDEFERRED;
	  task->in_tied_task = parent->in_tied_task;
	  task->taskgroup = taskgroup;
	  thr->task = task;
	  if (cpyfn)
	    {
	      cpyfn (arg, data);
	      task->copy_ctors_done = true;
	    }
	  else
	    memcpy (arg, data, arg_size);
	  ((TYPE *)arg)[0] = start;
	  start += task_step;
	  ((TYPE *)arg)[1] = start;
	  if (i == nfirst)
	    task_step -= step;
	  thr->task = parent;
	  task->kind = GOMP_TASK_WAITING;
	  task->fn = fn;
	  task->fn_data = arg;
	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
	}
      gomp_mutex_lock (&team->task_lock);
      /* If parallel or taskgroup has been cancelled, don't start new
	 tasks.  */
      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
			     || (taskgroup && taskgroup->cancelled))
			    && cpyfn == NULL, 0))
	{
	  gomp_mutex_unlock (&team->task_lock);
	  for (i = 0; i < num_tasks; i++)
	    {
	      gomp_finish_task (tasks[i]);
	      free (tasks[i]);
	    }
	  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
	    ialias_call (GOMP_taskgroup_end) ();
	  return;
	}
      if (taskgroup)
	taskgroup->num_children += num_tasks;
      for (i = 0; i < num_tasks; i++)
	{
	  struct gomp_task *task = tasks[i];
	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
				 task, priority,
				 PRIORITY_INSERT_BEGIN,
				 /*last_parent_depends_on=*/false,
				 task->parent_depends_on);
	  if (taskgroup)
	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
				   task, priority, PRIORITY_INSERT_BEGIN,
				   /*last_parent_depends_on=*/false,
				   task->parent_depends_on);
	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
				 PRIORITY_INSERT_END,
				 /*last_parent_depends_on=*/false,
				 task->parent_depends_on);
	  ++team->task_count;
	  ++team->task_queued_count;
	}
      gomp_team_barrier_set_task_pending (&team->barrier);
      if (team->task_running_count + !parent->in_tied_task
	  < team->nthreads)
	{
	  do_wake = team->nthreads - team->task_running_count
		    - !parent->in_tied_task;
	  if ((unsigned long) do_wake > num_tasks)
	    do_wake = num_tasks;
	}
      else
	do_wake = 0;
      gomp_mutex_unlock (&team->task_lock);
      if (do_wake)
	gomp_team_barrier_wake (&team->barrier, do_wake);
    }
  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
    ialias_call (GOMP_taskgroup_end) ();
}