Exemple #1
0
int
acc_is_present (void *h, size_t s)
{
  splay_tree_key n;

  if (!s || !h)
    return 0;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return h != NULL;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  if (n && ((uintptr_t)h < n->host_start
	    || (uintptr_t)h + s > n->host_end
	    || s > n->host_end - n->host_start))
    n = NULL;

  gomp_mutex_unlock (&acc_dev->lock);

  return n != NULL;
}
Exemple #2
0
void
GOMP_critical_start (void)
{
  /* There is an implicit flush on entry to a critical region. */
  __atomic_thread_fence (MEMMODEL_RELEASE);
  gomp_mutex_lock (&default_lock);
}
Exemple #3
0
void *
acc_hostptr (void *d)
{
  splay_tree_key n;
  void *h;
  void *offset;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return d;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      return NULL;
    }

  offset = d - n->tgt->tgt_start + n->tgt_offset;

  h = n->host_start + offset;

  gomp_mutex_unlock (&acc_dev->lock);

  return h;
}
Exemple #4
0
void *
acc_deviceptr (void *h)
{
  splay_tree_key n;
  void *d;
  void *offset;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return h;

  gomp_mutex_lock (&dev->lock);

  n = lookup_host (dev, h, 1);

  if (!n)
    {
      gomp_mutex_unlock (&dev->lock);
      return NULL;
    }

  offset = h - n->host_start;

  d = n->tgt->tgt_start + n->tgt_offset + offset;

  gomp_mutex_unlock (&dev->lock);

  return d;
}
Exemple #5
0
unsigned
GOMP_sections_start (unsigned count)
{
  struct gomp_thread *thr = gomp_thread ();
  long s, e, ret;

  if (gomp_work_share_start (false))
    {
      gomp_sections_init (thr->ts.work_share, count);
      gomp_work_share_init_done ();
    }

#ifdef HAVE_SYNC_BUILTINS
  if (gomp_iter_dynamic_next (&s, &e))
    ret = s;
  else
    ret = 0;
#else
  gomp_mutex_lock (&thr->ts.work_share->lock);
  if (gomp_iter_dynamic_next_locked (&s, &e))
    ret = s;
  else
    ret = 0;
  gomp_mutex_unlock (&thr->ts.work_share->lock);
#endif

  return ret;
}
Exemple #6
0
static struct gomp_device_descr *
acc_init_1 (acc_device_t d)
{
  struct gomp_device_descr *base_dev, *acc_dev;
  int ndevs;

  base_dev = resolve_device (d, true);

  ndevs = base_dev->get_num_devices_func ();

  if (ndevs <= 0 || goacc_device_num >= ndevs)
    acc_dev_num_out_of_range (d, goacc_device_num, ndevs);

  acc_dev = &base_dev[goacc_device_num];

  gomp_mutex_lock (&acc_dev->lock);
  if (acc_dev->is_initialized)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("device already active");
    }

  gomp_init_device (acc_dev);
  gomp_mutex_unlock (&acc_dev->lock);

  return base_dev;
}
Exemple #7
0
static void
update_dev_host (int is_dev, void *h, size_t s)
{
  splay_tree_key n;
  void *d;
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  /* No need to call lazy open, as the data must already have been
     mapped.  */

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset);

  gomp_mutex_unlock (&acc_dev->lock);

  if (is_dev)
    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
  else
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
}
Exemple #8
0
/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
   the device address is mapped. We choose to check if it mapped,
   and if it is, to unmap it. */
void
acc_free (void *d)
{
  splay_tree_key k;

  if (!d)
    return;

  struct goacc_thread *thr = goacc_thread ();

  assert (thr && thr->dev);

  struct gomp_device_descr *acc_dev = thr->dev;

  gomp_mutex_lock (&acc_dev->lock);

  /* We don't have to call lazy open here, as the ptr value must have
     been returned by acc_malloc.  It's not permitted to pass NULL in
     (unless you got that null from acc_malloc).  */
  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
    {
      void *offset;

      offset = d - k->tgt->tgt_start + k->tgt_offset;

      gomp_mutex_unlock (&acc_dev->lock);

      acc_unmap_data ((void *)(k->host_start + offset));
    }
  else
    gomp_mutex_unlock (&acc_dev->lock);

  acc_dev->free_func (acc_dev->target_id, d);
}
Exemple #9
0
static bool
gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
				     gomp_ull chunk_size, gomp_ull *istart,
				     gomp_ull *iend)
{
  struct gomp_thread *thr = gomp_thread ();
  bool ret;

  if (gomp_work_share_start (false))
    {
      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
			  GFS_GUIDED, chunk_size);
      gomp_doacross_ull_init (ncounts, counts, chunk_size);
      gomp_work_share_init_done ();
    }

#if defined HAVE_SYNC_BUILTINS && defined __LP64__
  ret = gomp_iter_ull_guided_next (istart, iend);
#else
  gomp_mutex_lock (&thr->ts.work_share->lock);
  ret = gomp_iter_ull_guided_next_locked (istart, iend);
  gomp_mutex_unlock (&thr->ts.work_share->lock);
#endif

  return ret;
}
Exemple #10
0
static void
update_dev_host (int is_dev, void *h, size_t s)
{
  splay_tree_key n;
  void *d;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset
		+ (uintptr_t) h - n->host_start);

  if (is_dev)
    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
  else
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);

  gomp_mutex_unlock (&acc_dev->lock);
}
Exemple #11
0
static bool
gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
			    gomp_ull incr, gomp_ull chunk_size,
			    gomp_ull *istart, gomp_ull *iend)
{
  struct gomp_thread *thr = gomp_thread ();
  bool ret;

  if (gomp_work_share_start (false))
    {
      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
			  GFS_GUIDED, chunk_size);
      gomp_work_share_init_done ();
    }

#if defined HAVE_SYNC_BUILTINS && defined __LP64__
  ret = gomp_iter_ull_guided_next (istart, iend);
#else
  gomp_mutex_lock (&thr->ts.work_share->lock);
  ret = gomp_iter_ull_guided_next_locked (istart, iend);
  gomp_mutex_unlock (&thr->ts.work_share->lock);
#endif

  return ret;
}
Exemple #12
0
bool
GOMP_cancel (int which, bool do_cancel)
{
  if (!gomp_cancel_var)
    return false;

  if (!do_cancel)
    return ialias_call (GOMP_cancellation_point) (which);

  struct gomp_thread *thr = gomp_thread ();
  struct gomp_team *team = thr->ts.team;
  if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
    {
      /* In orphaned worksharing region, all we want to cancel
	 is current thread.  */
      if (team != NULL)
	team->work_share_cancelled = 1;
      return true;
    }
  else if (which & GOMP_CANCEL_TASKGROUP)
    {
      if (thr->task->taskgroup && !thr->task->taskgroup->cancelled)
	{
	  gomp_mutex_lock (&team->task_lock);
	  thr->task->taskgroup->cancelled = true;
	  gomp_mutex_unlock (&team->task_lock);
	}
      return true;
    }
  team->team_cancelled = 1;
  gomp_team_barrier_cancel (team);
  return true;
}
Exemple #13
0
void
GOMP_parallel_end (void)
{
  struct gomp_task_icv *icv = gomp_icv (false);
  if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
    {
      struct gomp_thread *thr = gomp_thread ();
      struct gomp_team *team = thr->ts.team;
      unsigned int nthreads = team ? team->nthreads : 1;
      gomp_team_end ();
      if (nthreads > 1)
	{
	  /* If not nested, there is just one thread in the
	     contention group left, no need for atomicity.  */
	  if (thr->ts.team == NULL)
	    thr->thread_pool->threads_busy = 1;
	  else
	    {
#ifdef HAVE_SYNC_BUILTINS
	      __sync_fetch_and_add (&thr->thread_pool->threads_busy,
				    1UL - nthreads);
#else
	      gomp_mutex_lock (&gomp_managed_threads_lock);
	      thr->thread_pool->threads_busy -= nthreads - 1;
	      gomp_mutex_unlock (&gomp_managed_threads_lock);
#endif
	    }
	}
    }
  else
    gomp_team_end ();
}
Exemple #14
0
void
gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;
  splay_tree_key n;
  struct target_mem_desc *t;
  int minrefs = (mapnum == 1) ? 2 : 3;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, 1);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);

  t = n->tgt;

  struct target_mem_desc *tp;

  if (t->refcount == minrefs)
    {
      /* This is the last reference, so pull the descriptor off the
	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
	 freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	{
	  if (n->tgt == t)
	    {
	      if (tp)
		tp->prev = t->prev;
	      else
		acc_dev->openacc.data_environ = t->prev;
	      break;
	    }
	}
    }

  if (force_copyfrom)
    t->list[0].copy_from = 1;

  gomp_mutex_unlock (&acc_dev->lock);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (t, true);
  else
    t->device_descr->openacc.register_async_cleanup_func (t, async);

  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
}
Exemple #15
0
void
acc_unmap_data (void *h)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  /* No need to call lazy open, as the address must have been mapped.  */

  size_t host_size;

  gomp_mutex_lock (&acc_dev->lock);

  splay_tree_key n = lookup_host (acc_dev, h, 1);
  struct target_mem_desc *t;

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  host_size = n->host_end - n->host_start;

  if (n->host_start != (uintptr_t) h)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] surrounds %p",
		  (void *) n->host_start, (int) host_size, (void *) h);
    }

  t = n->tgt;

  if (t->refcount == 2)
    {
      struct target_mem_desc *tp;

      /* This is the last reference, so pull the descriptor off the
         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
         freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	if (n->tgt == t)
	  {
	    if (tp)
	      tp->prev = t->prev;
	    else
	      acc_dev->openacc.data_environ = t->prev;

	    break;
	  }
    }

  gomp_mutex_unlock (&acc_dev->lock);

  gomp_unmap_vars (t, true);
}
Exemple #16
0
void
GOMP_critical_start (void)
{
  gomp_mutex_lock (&default_lock);

  /* OMP v3.1, 2.8.6 p81,l16 - "At entry to critical regions" */
  gomp_flush0();
}
Exemple #17
0
void
acc_shutdown (acc_device_t d)
{
  gomp_mutex_lock (&acc_device_lock);

  acc_shutdown_1 (d);

  gomp_mutex_unlock (&acc_device_lock);
}
Exemple #18
0
void
GOMP_atomic_start (void)
{
  gomp_mutex_lock (&atomic_lock);

  /* OMP v3.1, 2.8.6 p82,l1 - "At entry to atomic operation"
     TODO: Replace gomp_flush0 with a targeted flush containing just the 
     storage locations involved in the atomic construct */
  gomp_flush0();
}
Exemple #19
0
void
acc_set_device_num (int ord, acc_device_t d)
{
  struct gomp_device_descr *base_dev, *acc_dev;
  int num_devices;

  if (!cached_base_dev)
    gomp_init_targets_once ();

  if (ord < 0)
    ord = goacc_device_num;

  if ((int) d == 0)
    /* Set whatever device is being used by the current host thread to use
       device instance ORD.  It's unclear if this is supposed to affect other
       host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
    goacc_attach_host_thread_to_device (ord);
  else
    {
      gomp_mutex_lock (&acc_device_lock);

      cached_base_dev = base_dev = resolve_device (d);

      num_devices = base_dev->get_num_devices_func ();

      if (ord >= num_devices)
        gomp_fatal ("device %u out of range", ord);

      acc_dev = &base_dev[ord];

      gomp_mutex_lock (&acc_dev->lock);
      if (!acc_dev->is_initialized)
        gomp_init_device (acc_dev);
      gomp_mutex_unlock (&acc_dev->lock);

      gomp_mutex_unlock (&acc_device_lock);

      goacc_attach_host_thread_to_device (ord);
    }
  
  goacc_device_num = ord;
}
Exemple #20
0
void
acc_shutdown (acc_device_t d)
{
  gomp_init_targets_once ();

  gomp_mutex_lock (&acc_device_lock);

  acc_shutdown_1 (d);

  gomp_mutex_unlock (&acc_device_lock);
}
Exemple #21
0
void
gomp_team_barrier_cancel (struct gomp_team *team)
{
  gomp_mutex_lock (&team->task_lock);
  if (team->barrier.generation & BAR_CANCELLED)
    {
      gomp_mutex_unlock (&team->task_lock);
      return;
    }
  team->barrier.generation |= BAR_CANCELLED;
  gomp_mutex_unlock (&team->task_lock);
  futex_wake ((int *) &team->barrier.generation, INT_MAX);
}
Exemple #22
0
void
gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
{
  void *me = gomp_icv (true);

  if (lock->owner != me)
    {
      gomp_mutex_lock (&lock->lock);
      lock->owner = me;
    }

  lock->count++;
}
Exemple #23
0
static void
lazy_init_and_open (acc_device_t d)
{
  if (!base_dev)
    gomp_init_targets_once ();

  gomp_mutex_lock (&acc_device_lock);

  base_dev = lazy_init (d);

  lazy_open (-1);

  gomp_mutex_unlock (&acc_device_lock);
}
Exemple #24
0
void
acc_init (acc_device_t d)
{
  if (!cached_base_dev)
    gomp_init_targets_once ();

  gomp_mutex_lock (&acc_device_lock);

  cached_base_dev = acc_init_1 (d);

  gomp_mutex_unlock (&acc_device_lock);
  
  goacc_attach_host_thread_to_device (-1);
}
Exemple #25
0
void
acc_set_device_num (int n, acc_device_t d)
{
  const struct gomp_device_descr *dev;
  int num_devices;

  if (!base_dev)
    gomp_init_targets_once ();

  if ((int) d == 0)
    {
      int i;

      /* A device setting of zero sets all device types on the system to use
         the Nth instance of that device type.  Only attempt it for initialized
	 devices though.  */
      for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++)
        {
	  dev = resolve_device (d);
	  if (dev && dev->is_initialized)
	    dev->openacc.set_device_num_func (n);
	}

      /* ...and for future calls to acc_init/acc_set_device_type, etc.  */
      goacc_device_num = n;
    }
  else
    {
      struct goacc_thread *thr = goacc_thread ();

      gomp_mutex_lock (&acc_device_lock);

      base_dev = lazy_init (d);

      num_devices = base_dev->get_num_devices_func ();

      if (n >= num_devices)
        gomp_fatal ("device %u out of range", n);

      /* If we're changing the device number, de-associate this thread with
	 the device (but don't close the device, since it may be in use by
	 other threads).  */
      if (thr && thr->dev && n != thr->dev->target_id)
	thr->dev = NULL;

      lazy_open (n);

      gomp_mutex_unlock (&acc_device_lock);
    }
}
Exemple #26
0
void
acc_init (acc_device_t d)
{
  if (!base_dev)
    gomp_init_targets_once ();

  gomp_mutex_lock (&acc_device_lock);

  base_dev = acc_init_1 (d);

  lazy_open (-1);

  gomp_mutex_unlock (&acc_device_lock);
}
Exemple #27
0
static bool
gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
{
  struct gomp_thread *thr = gomp_thread ();
  int test;

  gomp_ordered_sync ();
  gomp_mutex_lock (&thr->ts.work_share->lock);
  test = gomp_iter_ull_static_next (istart, iend);
  if (test >= 0)
    gomp_ordered_static_next ();
  gomp_mutex_unlock (&thr->ts.work_share->lock);

  return test == 0;
}
Exemple #28
0
static bool
gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
{
  bool ret;

#if defined HAVE_SYNC_BUILTINS && defined __LP64__
  ret = gomp_iter_ull_guided_next (istart, iend);
#else
  struct gomp_thread *thr = gomp_thread ();
  gomp_mutex_lock (&thr->ts.work_share->lock);
  ret = gomp_iter_ull_guided_next_locked (istart, iend);
  gomp_mutex_unlock (&thr->ts.work_share->lock);
#endif

  return ret;
}
Exemple #29
0
static void
lazy_open (int ord)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev;

  if (thr && thr->dev)
    {
      assert (ord < 0 || ord == thr->dev->target_id);
      return;
    }

  assert (base_dev);

  if (ord < 0)
    ord = goacc_device_num;

  /* The OpenACC 2.0 spec leaves the runtime's behaviour when an out-of-range
     device is requested as implementation-defined (4.2 ACC_DEVICE_NUM).
     We choose to raise an error in such a case.  */
  if (ord >= base_dev->get_num_devices_func ())
    gomp_fatal ("device %u does not exist", ord);

  if (!thr)
    thr = goacc_new_thread ();

  acc_dev = thr->dev = &base_dev[ord];

  assert (acc_dev->target_id == ord);

  thr->saved_bound_dev = NULL;
  thr->mapped_data = NULL;

  if (!acc_dev->openacc.target_data)
    acc_dev->openacc.target_data = acc_dev->openacc.open_device_func (ord);

  thr->target_tls
    = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);

  acc_dev->openacc.async_set_async_func (acc_async_sync);

  struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
  gomp_mutex_lock (&mem_map->lock);
  if (!mem_map->is_initialized)
    gomp_init_tables (acc_dev, mem_map);
  gomp_mutex_unlock (&mem_map->lock);
}
Exemple #30
0
static void
delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
{
  size_t host_size;
  splay_tree_key n;
  void *d;
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  /* No need to call lazy open, as the data must already have been
     mapped.  */

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset
		+ (uintptr_t) h - n->host_start);

  host_size = n->host_end - n->host_start;

  if (n->host_start != (uintptr_t) h || host_size != s)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
    }

  gomp_mutex_unlock (&acc_dev->lock);

  if (f & FLAG_COPYOUT)
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);

  acc_unmap_data (h);

  if (!acc_dev->free_func (acc_dev->target_id, d))
    gomp_fatal ("error in freeing device memory in %s", libfnname);
}