Exemplo n.º 1
0
bool
gomp_affinity_init_level (int level, unsigned long count, bool quiet)
{
  unsigned long i, max = 8 * gomp_cpuset_size;

  if (gomp_cpusetp)
    {
      unsigned long maxcount
	= gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
      if (count > maxcount)
	count = maxcount;
    }
  gomp_places_list = gomp_affinity_alloc (count, quiet);
  gomp_places_list_len = 0;
  if (gomp_places_list == NULL)
    return false;
  /* SMT (threads).  */
  if (level == 1)
    {
      for (i = 0; i < max && gomp_places_list_len < count; i++)
	if (CPU_ISSET_S (i, gomp_cpuset_size, gomp_cpusetp))
	  {
	    gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]);
	    gomp_affinity_add_cpus (gomp_places_list[gomp_places_list_len],
				    i, 1, 0, true);
	    ++gomp_places_list_len;
	  }
      return true;
    }
  else
    {
      char name[sizeof ("/sys/devices/system/cpu/cpu/topology/"
			"thread_siblings_list") + 3 * sizeof (unsigned long)];
      size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1;
      cpu_set_t *copy = gomp_alloca (gomp_cpuset_size);
      FILE *f;
      char *line = NULL;
      size_t linelen = 0;

      memcpy (name, "/sys/devices/system/cpu/cpu", prefix_len);
      memcpy (copy, gomp_cpusetp, gomp_cpuset_size);
      for (i = 0; i < max && gomp_places_list_len < count; i++)
	if (CPU_ISSET_S (i, gomp_cpuset_size, copy))
	  {
	    sprintf (name + prefix_len, "%lu/topology/%s_siblings_list",
		     i, level == 2 ? "thread" : "core");
	    f = fopen (name, "r");
	    if (f != NULL)
	      {
		if (getline (&line, &linelen, f) > 0)
		  {
		    char *p = line;
		    bool seen_i = false;
		    void *pl = gomp_places_list[gomp_places_list_len];
		    gomp_affinity_init_place (pl);
		    while (*p && *p != '\n')
		      {
			unsigned long first, last;
			errno = 0;
			first = strtoul (p, &p, 10);
			if (errno)
			  break;
			last = first;
			if (*p == '-')
			  {
			    errno = 0;
			    last = strtoul (p + 1, &p, 10);
			    if (errno || last < first)
			      break;
			  }
			for (; first <= last; first++)
			  if (CPU_ISSET_S (first, gomp_cpuset_size, copy)
			      && gomp_affinity_add_cpus (pl, first, 1, 0,
							 true))
			    {
			      CPU_CLR_S (first, gomp_cpuset_size, copy);
			      if (first == i)
				seen_i = true;
			    }
			if (*p == ',')
			  ++p;
		      }
		    if (seen_i)
		      gomp_places_list_len++;
		  }
		fclose (f);
	      }
	  }
      if (gomp_places_list_len == 0)
	{
	  if (!quiet)
	    gomp_error ("Error reading %s topology",
			level == 2 ? "core" : "socket");
	  free (gomp_places_list);
	  gomp_places_list = NULL;
	  return false;
	}
      return true;
    }
  return false;
}
Exemplo n.º 2
0
Arquivo: team.c Projeto: 0mp/freebsd
void
gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
		 struct gomp_work_share *work_share)
{
  struct gomp_thread_start_data *start_data;
  struct gomp_thread *thr, *nthr;
  struct gomp_team *team;
  bool nested;
  unsigned i, n, old_threads_used = 0;
  pthread_attr_t thread_attr, *attr;

  thr = gomp_thread ();
  nested = thr->ts.team != NULL;

  team = new_team (nthreads, work_share);

  /* Always save the previous state, even if this isn't a nested team.
     In particular, we should save any work share state from an outer
     orphaned work share construct.  */
  team->prev_ts = thr->ts;

  thr->ts.team = team;
  thr->ts.work_share = work_share;
  thr->ts.team_id = 0;
  thr->ts.work_share_generation = 0;
  thr->ts.static_trip = 0;

  if (nthreads == 1)
    return;

  i = 1;

  /* We only allow the reuse of idle threads for non-nested PARALLEL
     regions.  This appears to be implied by the semantics of
     threadprivate variables, but perhaps that's reading too much into
     things.  Certainly it does prevent any locking problems, since
     only the initial program thread will modify gomp_threads.  */
  if (!nested)
    {
      old_threads_used = gomp_threads_used;

      if (nthreads <= old_threads_used)
	n = nthreads;
      else if (old_threads_used == 0)
	{
	  n = 0;
	  gomp_barrier_init (&gomp_threads_dock, nthreads);
	}
      else
	{
	  n = old_threads_used;

	  /* Increase the barrier threshold to make sure all new
	     threads arrive before the team is released.  */
	  gomp_barrier_reinit (&gomp_threads_dock, nthreads);
	}

      /* Not true yet, but soon will be.  We're going to release all
	 threads from the dock, and those that aren't part of the 
	 team will exit.  */
      gomp_threads_used = nthreads;

      /* Release existing idle threads.  */
      for (; i < n; ++i)
	{
	  nthr = gomp_threads[i];
	  nthr->ts.team = team;
	  nthr->ts.work_share = work_share;
	  nthr->ts.team_id = i;
	  nthr->ts.work_share_generation = 0;
	  nthr->ts.static_trip = 0;
	  nthr->fn = fn;
	  nthr->data = data;
	  team->ordered_release[i] = &nthr->release;
	}

      if (i == nthreads)
	goto do_release;

      /* If necessary, expand the size of the gomp_threads array.  It is
	 expected that changes in the number of threads is rare, thus we
	 make no effort to expand gomp_threads_size geometrically.  */
      if (nthreads >= gomp_threads_size)
	{
	  gomp_threads_size = nthreads + 1;
	  gomp_threads
	    = gomp_realloc (gomp_threads,
			    gomp_threads_size
			    * sizeof (struct gomp_thread_data *));
	}
    }

  attr = &gomp_thread_attr;
  if (gomp_cpu_affinity != NULL)
    {
      size_t stacksize;
      pthread_attr_init (&thread_attr);
      pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
      if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
	pthread_attr_setstacksize (&thread_attr, stacksize);
      attr = &thread_attr;
    }

  start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
			    * (nthreads-i));

  /* Launch new threads.  */
  for (; i < nthreads; ++i, ++start_data)
    {
      pthread_t pt;
      int err;

      start_data->ts.team = team;
      start_data->ts.work_share = work_share;
      start_data->ts.team_id = i;
      start_data->ts.work_share_generation = 0;
      start_data->ts.static_trip = 0;
      start_data->fn = fn;
      start_data->fn_data = data;
      start_data->nested = nested;

      if (gomp_cpu_affinity != NULL)
	gomp_init_thread_affinity (attr);

      err = pthread_create (&pt, attr, gomp_thread_start, start_data);
      if (err != 0)
	gomp_fatal ("Thread creation failed: %s", strerror (err));
    }

  if (gomp_cpu_affinity != NULL)
    pthread_attr_destroy (&thread_attr);

 do_release:
  gomp_barrier_wait (nested ? &team->barrier : &gomp_threads_dock);

  /* Decrease the barrier threshold to match the number of threads
     that should arrive back at the end of this team.  The extra
     threads should be exiting.  Note that we arrange for this test
     to never be true for nested teams.  */
  if (nthreads < old_threads_used)
    gomp_barrier_reinit (&gomp_threads_dock, nthreads);
}
Exemplo n.º 3
0
void
GOACC_parallel_keyed (int device, void (*fn) (void *),
		      size_t mapnum, void **hostaddrs, size_t *sizes,
		      unsigned short *kinds, ...)
{
  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  va_list ap;
  struct goacc_thread *thr;
  struct gomp_device_descr *acc_dev;
  struct target_mem_desc *tgt;
  void **devaddrs;
  unsigned int i;
  struct splay_tree_key_s k;
  splay_tree_key tgt_fn_key;
  void (*tgt_fn);
  int async = GOMP_ASYNC_SYNC;
  unsigned dims[GOMP_DIM_MAX];
  unsigned tag;

#ifdef HAVE_INTTYPES_H
  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
#else
  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
#endif
  goacc_lazy_initialize ();

  thr = goacc_thread ();
  acc_dev = thr->dev;

  /* Host fallback if "if" clause is false or if the current device is set to
     the host.  */
  if (host_fallback)
    {
      goacc_save_and_set_bind (acc_device_host);
      fn (hostaddrs);
      goacc_restore_bind ();
      return;
    }
  else if (acc_device_type (acc_dev->type) == acc_device_host)
    {
      fn (hostaddrs);
      return;
    }

  va_start (ap, kinds);
  /* TODO: This will need amending when device_type is implemented.  */
  while ((tag = va_arg (ap, unsigned)) != 0)
    {
      if (GOMP_LAUNCH_DEVICE (tag))
	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
		    GOMP_LAUNCH_DEVICE (tag));

      switch (GOMP_LAUNCH_CODE (tag))
	{
	case GOMP_LAUNCH_DIM:
	  {
	    unsigned mask = GOMP_LAUNCH_OP (tag);

	    for (i = 0; i != GOMP_DIM_MAX; i++)
	      if (mask & GOMP_DIM_MASK (i))
		dims[i] = va_arg (ap, unsigned);
	  }
	  break;

	case GOMP_LAUNCH_ASYNC:
	  {
	    /* Small constant values are encoded in the operand.  */
	    async = GOMP_LAUNCH_OP (tag);

	    if (async == GOMP_LAUNCH_OP_MAX)
	      async = va_arg (ap, unsigned);
	    break;
	  }

	case GOMP_LAUNCH_WAIT:
	  {
	    unsigned num_waits = GOMP_LAUNCH_OP (tag);

	    if (num_waits)
	      goacc_wait (async, num_waits, &ap);
	    break;
	  }

	default:
	  gomp_fatal ("unrecognized offload code '%d',"
		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
	}
    }
  va_end (ap);
  
  acc_dev->openacc.async_set_async_func (async);

  if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
    {
      k.host_start = (uintptr_t) fn;
      k.host_end = k.host_start + 1;
      gomp_mutex_lock (&acc_dev->lock);
      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
      gomp_mutex_unlock (&acc_dev->lock);

      if (tgt_fn_key == NULL)
	gomp_fatal ("target function wasn't mapped");

      tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
    }
  else
    tgt_fn = (void (*)) fn;

  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
		       GOMP_MAP_VARS_OPENACC);

  devaddrs = gomp_alloca (sizeof (void *) * mapnum);
  for (i = 0; i < mapnum; i++)
    devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
			    + tgt->list[i].key->tgt_offset);

  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
			      async, dims, tgt);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (tgt, true);
  else
    {
      gomp_copy_from_async (tgt);
      acc_dev->openacc.register_async_cleanup_func (tgt);
    }

  acc_dev->openacc.async_set_async_func (acc_async_sync);
}