Exemple #1
0
void
acc_map_data (void *h, void *d, size_t s)
{
  struct target_mem_desc *tgt;
  size_t mapnum = 1;
  void *hostaddrs = h;
  void *devaddrs = d;
  size_t sizes = s;
  unsigned short kinds = GOMP_MAP_ALLOC;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    {
      if (d != h)
        gomp_fatal ("cannot map data on shared-memory system");

      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
			   GOMP_MAP_VARS_OPENACC);
    }
  else
    {
      struct goacc_thread *thr = goacc_thread ();

      if (!d || !h || !s)
	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
                    (void *)h, (int)s, (void *)d, (int)s);

      gomp_mutex_lock (&acc_dev->lock);

      if (lookup_host (acc_dev, h, s))
        {
	  gomp_mutex_unlock (&acc_dev->lock);
	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
		      (int)s);
	}

      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
        {
	  gomp_mutex_unlock (&acc_dev->lock);
	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
		      (int)s);
	}

      gomp_mutex_unlock (&acc_dev->lock);

      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
			   &kinds, true, GOMP_MAP_VARS_OPENACC);
    }

  gomp_mutex_lock (&acc_dev->lock);
  tgt->prev = acc_dev->openacc.data_environ;
  acc_dev->openacc.data_environ = tgt;
  gomp_mutex_unlock (&acc_dev->lock);
}
Exemple #2
0
static void
update_dev_host (int is_dev, void *h, size_t s)
{
  splay_tree_key n;
  void *d;
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  /* No need to call lazy open, as the data must already have been
     mapped.  */

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset);

  gomp_mutex_unlock (&acc_dev->lock);

  if (is_dev)
    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
  else
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
}
Exemple #3
0
void *
acc_deviceptr (void *h)
{
  splay_tree_key n;
  void *d;
  void *offset;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return h;

  gomp_mutex_lock (&dev->lock);

  n = lookup_host (dev, h, 1);

  if (!n)
    {
      gomp_mutex_unlock (&dev->lock);
      return NULL;
    }

  offset = h - n->host_start;

  d = n->tgt->tgt_start + n->tgt_offset + offset;

  gomp_mutex_unlock (&dev->lock);

  return d;
}
Exemple #4
0
/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
   the device address is mapped. We choose to check if it mapped,
   and if it is, to unmap it. */
void
acc_free (void *d)
{
  splay_tree_key k;

  if (!d)
    return;

  struct goacc_thread *thr = goacc_thread ();

  assert (thr && thr->dev);

  struct gomp_device_descr *acc_dev = thr->dev;

  gomp_mutex_lock (&acc_dev->lock);

  /* We don't have to call lazy open here, as the ptr value must have
     been returned by acc_malloc.  It's not permitted to pass NULL in
     (unless you got that null from acc_malloc).  */
  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
    {
      void *offset;

      offset = d - k->tgt->tgt_start + k->tgt_offset;

      gomp_mutex_unlock (&acc_dev->lock);

      acc_unmap_data ((void *)(k->host_start + offset));
    }
  else
    gomp_mutex_unlock (&acc_dev->lock);

  acc_dev->free_func (acc_dev->target_id, d);
}
Exemple #5
0
void
acc_set_device_type (acc_device_t d)
{
  struct gomp_device_descr *base_dev, *acc_dev;
  struct goacc_thread *thr = goacc_thread ();

  gomp_mutex_lock (&acc_device_lock);

  if (!cached_base_dev)
    gomp_init_targets_once ();

  cached_base_dev = base_dev = resolve_device (d);
  acc_dev = &base_dev[goacc_device_num];

  gomp_mutex_lock (&acc_dev->lock);
  if (!acc_dev->is_initialized)
    gomp_init_device (acc_dev);
  gomp_mutex_unlock (&acc_dev->lock);

  gomp_mutex_unlock (&acc_device_lock);

  /* We're changing device type: invalidate the current thread's dev and
     base_dev pointers.  */
  if (thr && thr->base_dev != base_dev)
    {
      thr->base_dev = thr->dev = NULL;
      if (thr->mapped_data)
        gomp_fatal ("acc_set_device_type in 'acc data' region");
    }

  goacc_attach_host_thread_to_device (-1);
}
Exemple #6
0
void
gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;
  splay_tree_key n;
  struct target_mem_desc *t;
  int minrefs = (mapnum == 1) ? 2 : 3;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, 1);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);

  t = n->tgt;

  struct target_mem_desc *tp;

  if (t->refcount == minrefs)
    {
      /* This is the last reference, so pull the descriptor off the
	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
	 freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	{
	  if (n->tgt == t)
	    {
	      if (tp)
		tp->prev = t->prev;
	      else
		acc_dev->openacc.data_environ = t->prev;
	      break;
	    }
	}
    }

  if (force_copyfrom)
    t->list[0].copy_from = 1;

  gomp_mutex_unlock (&acc_dev->lock);

  /* If running synchronously, unmap immediately.  */
  if (async < acc_async_noval)
    gomp_unmap_vars (t, true);
  else
    t->device_descr->openacc.register_async_cleanup_func (t, async);

  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
}
Exemple #7
0
void
GOACC_data_start (int device, const void *offload_table, size_t mapnum,
		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
{
  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  struct target_mem_desc *tgt;

  gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
	      __FUNCTION__, mapnum, hostaddrs, sizes, kinds);

  select_acc_device (device);

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  /* Host fallback or 'do nothing'.  */
  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
      || host_fallback)
    {
      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
      tgt->prev = thr->mapped_data;
      thr->mapped_data = tgt;

      return;
    }

  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
		       false);
  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
  tgt->prev = thr->mapped_data;
  thr->mapped_data = tgt;
}
Exemple #8
0
void *
acc_hostptr (void *d)
{
  splay_tree_key n;
  void *h;
  void *offset;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return d;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      return NULL;
    }

  offset = d - n->tgt->tgt_start + n->tgt_offset;

  h = n->host_start + offset;

  gomp_mutex_unlock (&acc_dev->lock);

  return h;
}
Exemple #9
0
int
acc_is_present (void *h, size_t s)
{
  splay_tree_key n;

  if (!s || !h)
    return 0;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return h != NULL;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  if (n && ((uintptr_t)h < n->host_start
	    || (uintptr_t)h + s > n->host_end
	    || s > n->host_end - n->host_start))
    n = NULL;

  gomp_mutex_unlock (&acc_dev->lock);

  return n != NULL;
}
Exemple #10
0
static void
update_dev_host (int is_dev, void *h, size_t s)
{
  splay_tree_key n;
  void *d;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset
		+ (uintptr_t) h - n->host_start);

  if (is_dev)
    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
  else
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);

  gomp_mutex_unlock (&acc_dev->lock);
}
Exemple #11
0
void
acc_unmap_data (void *h)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  /* No need to call lazy open, as the address must have been mapped.  */

  size_t host_size;

  gomp_mutex_lock (&acc_dev->lock);

  splay_tree_key n = lookup_host (acc_dev, h, 1);
  struct target_mem_desc *t;

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("%p is not a mapped block", (void *)h);
    }

  host_size = n->host_end - n->host_start;

  if (n->host_start != (uintptr_t) h)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] surrounds %p",
		  (void *) n->host_start, (int) host_size, (void *) h);
    }

  t = n->tgt;

  if (t->refcount == 2)
    {
      struct target_mem_desc *tp;

      /* This is the last reference, so pull the descriptor off the
         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
         freeing the device memory. */
      t->tgt_end = 0;
      t->to_free = 0;

      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
	   tp = t, t = t->prev)
	if (n->tgt == t)
	  {
	    if (tp)
	      tp->prev = t->prev;
	    else
	      acc_dev->openacc.data_environ = t->prev;

	    break;
	  }
    }

  gomp_mutex_unlock (&acc_dev->lock);

  gomp_unmap_vars (t, true);
}
Exemple #12
0
attribute_hidden void
goacc_restore_bind (void)
{
  struct goacc_thread *thr = goacc_thread ();

  thr->dev = thr->saved_bound_dev;
  thr->saved_bound_dev = NULL;
}
Exemple #13
0
void
GOACC_update (int device, const void *offload_table, size_t mapnum,
	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
	      int async, int num_waits, ...)
{
  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
  size_t i;

  select_acc_device (device);

  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
      || host_fallback)
    return;

  if (num_waits > 0)
    {
      va_list ap;

      va_start (ap, num_waits);

      goacc_wait (async, num_waits, ap);

      va_end (ap);
    }

  acc_dev->openacc.async_set_async_func (async);

  for (i = 0; i < mapnum; ++i)
    {
      unsigned char kind = kinds[i] & 0xff;

      switch (kind)
	{
	case GOMP_MAP_POINTER:
	case GOMP_MAP_TO_PSET:
	  break;

	case GOMP_MAP_FORCE_TO:
	  acc_update_device (hostaddrs[i], sizes[i]);
	  break;

	case GOMP_MAP_FORCE_FROM:
	  acc_update_self (hostaddrs[i], sizes[i]);
	  break;

	default:
	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
	  break;
	}
    }

  acc_dev->openacc.async_set_async_func (acc_async_sync);
}
Exemple #14
0
void
acc_wait_all (void)
{
  struct goacc_thread *thr = goacc_thread ();

  if (!thr || !thr->dev)
    gomp_fatal ("no device active");

  thr->dev->openacc.async_wait_all_func ();
}
Exemple #15
0
void
acc_wait_async (int async1, int async2)
{
  struct goacc_thread *thr = goacc_thread ();

  if (!thr || !thr->dev)
    gomp_fatal ("no device active");

  thr->dev->openacc.async_wait_async_func (async1, async2);
}
Exemple #16
0
attribute_hidden void
goacc_save_and_set_bind (acc_device_t d)
{
  struct goacc_thread *thr = goacc_thread ();

  assert (!thr->saved_bound_dev);

  thr->saved_bound_dev = thr->dev;
  thr->dev = dispatchers[d];
}
Exemple #17
0
void
GOACC_data_end (void)
{
  struct goacc_thread *thr = goacc_thread ();
  struct target_mem_desc *tgt = thr->mapped_data;

  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
  thr->mapped_data = tgt->prev;
  gomp_unmap_vars (tgt, true);
  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
}
Exemple #18
0
void
acc_memcpy_from_device (void *h, void *d, size_t s)
{
  /* No need to call lazy open here, as the device pointer must have
     been obtained from a routine that did that.  */
  struct goacc_thread *thr = goacc_thread ();

  assert (thr && thr->dev);

  thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
}
Exemple #19
0
int
acc_on_device (acc_device_t dev)
{
  struct goacc_thread *thr = goacc_thread ();

  if (thr && thr->dev
      && acc_device_type (thr->dev->type) == acc_device_host_nonshm)
    return dev == acc_device_host_nonshm || dev == acc_device_not_host;

  /* Just rely on the compiler builtin.  */
  return __builtin_acc_on_device (dev);
}
Exemple #20
0
void
acc_wait_all_async (int async)
{
  if (async < acc_async_sync)
    gomp_fatal ("invalid async argument: %d", async);

  struct goacc_thread *thr = goacc_thread ();

  if (!thr || !thr->dev)
    gomp_fatal ("no device active");

  thr->dev->openacc.async_wait_all_async_func (async);
}
Exemple #21
0
attribute_hidden void
goacc_lazy_initialize (void)
{
  struct goacc_thread *thr = goacc_thread ();

  if (thr && thr->dev)
    return;

  if (!cached_base_dev)
    acc_init (acc_device_default);
  else
    goacc_attach_host_thread_to_device (-1);
}
Exemple #22
0
void *
acc_malloc (size_t s)
{
  if (!s)
    return NULL;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();

  assert (thr->dev);

  return thr->dev->alloc_func (thr->dev->target_id, s);
}
Exemple #23
0
void
acc_set_device_num (int n, acc_device_t d)
{
  const struct gomp_device_descr *dev;
  int num_devices;

  if (!base_dev)
    gomp_init_targets_once ();

  if ((int) d == 0)
    {
      int i;

      /* A device setting of zero sets all device types on the system to use
         the Nth instance of that device type.  Only attempt it for initialized
	 devices though.  */
      for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++)
        {
	  dev = resolve_device (d);
	  if (dev && dev->is_initialized)
	    dev->openacc.set_device_num_func (n);
	}

      /* ...and for future calls to acc_init/acc_set_device_type, etc.  */
      goacc_device_num = n;
    }
  else
    {
      struct goacc_thread *thr = goacc_thread ();

      gomp_mutex_lock (&acc_device_lock);

      base_dev = lazy_init (d);

      num_devices = base_dev->get_num_devices_func ();

      if (n >= num_devices)
        gomp_fatal ("device %u out of range", n);

      /* If we're changing the device number, de-associate this thread with
	 the device (but don't close the device, since it may be in use by
	 other threads).  */
      if (thr && thr->dev && n != thr->dev->target_id)
	thr->dev = NULL;

      lazy_open (n);

      gomp_mutex_unlock (&acc_device_lock);
    }
}
Exemple #24
0
static void
goacc_wait (int async, int num_waits, va_list ap)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;
  int i;

  assert (num_waits >= 0);

  if (async == acc_async_sync && num_waits == 0)
    {
      acc_wait_all ();
      return;
    }

  if (async == acc_async_sync && num_waits)
    {
      for (i = 0; i < num_waits; i++)
        {
          int qid = va_arg (ap, int);

          if (acc_async_test (qid))
            continue;

          acc_wait (qid);
        }
      return;
    }

  if (async == acc_async_noval && num_waits == 0)
    {
      acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
      return;
    }

  for (i = 0; i < num_waits; i++)
    {
      int qid = va_arg (ap, int);

      if (acc_async_test (qid))
	continue;

      /* If we're waiting on the same asynchronous queue as we're launching on,
         the queue itself will order work as required, so there's no need to
	 wait explicitly.  */
      if (qid != async)
	acc_dev->openacc.async_wait_async_func (qid, async);
    }
}
Exemple #25
0
int
acc_on_device (acc_device_t dev)
{
  struct goacc_thread *thr = goacc_thread ();

  /* We only want to appear to be the "host_nonshm" plugin from "offloaded"
     code -- i.e. within a parallel region.  Test a flag set by the
     openacc_parallel hook of the host_nonshm plugin to determine that.  */
  if (acc_get_device_type () == acc_device_host_nonshm
      && thr && thr->target_tls
      && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
    return dev == acc_device_host_nonshm || dev == acc_device_not_host;

  /* For OpenACC, libgomp is only built for the host, so this is sufficient.  */
  return dev == acc_device_host || dev == acc_device_none;
}
Exemple #26
0
void
GOACC_wait (int async, int num_waits, ...)
{
  if (num_waits)
    {
      va_list ap;

      va_start (ap, num_waits);
      goacc_wait (async, num_waits, &ap);
      va_end (ap);
    }
  else if (async == acc_async_sync)
    acc_wait_all ();
  else if (async == acc_async_noval)
    goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
}
Exemple #27
0
static void
lazy_open (int ord)
{
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev;

  if (thr && thr->dev)
    {
      assert (ord < 0 || ord == thr->dev->target_id);
      return;
    }

  assert (base_dev);

  if (ord < 0)
    ord = goacc_device_num;

  /* The OpenACC 2.0 spec leaves the runtime's behaviour when an out-of-range
     device is requested as implementation-defined (4.2 ACC_DEVICE_NUM).
     We choose to raise an error in such a case.  */
  if (ord >= base_dev->get_num_devices_func ())
    gomp_fatal ("device %u does not exist", ord);

  if (!thr)
    thr = goacc_new_thread ();

  acc_dev = thr->dev = &base_dev[ord];

  assert (acc_dev->target_id == ord);

  thr->saved_bound_dev = NULL;
  thr->mapped_data = NULL;

  if (!acc_dev->openacc.target_data)
    acc_dev->openacc.target_data = acc_dev->openacc.open_device_func (ord);

  thr->target_tls
    = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);

  acc_dev->openacc.async_set_async_func (acc_async_sync);

  struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
  gomp_mutex_lock (&mem_map->lock);
  if (!mem_map->is_initialized)
    gomp_init_tables (acc_dev, mem_map);
  gomp_mutex_unlock (&mem_map->lock);
}
Exemple #28
0
static void
delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
{
  size_t host_size;
  splay_tree_key n;
  void *d;
  struct goacc_thread *thr = goacc_thread ();
  struct gomp_device_descr *acc_dev = thr->dev;

  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return;

  gomp_mutex_lock (&acc_dev->lock);

  n = lookup_host (acc_dev, h, s);

  /* No need to call lazy open, as the data must already have been
     mapped.  */

  if (!n)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
    }

  d = (void *) (n->tgt->tgt_start + n->tgt_offset
		+ (uintptr_t) h - n->host_start);

  host_size = n->host_end - n->host_start;

  if (n->host_start != (uintptr_t) h || host_size != s)
    {
      gomp_mutex_unlock (&acc_dev->lock);
      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
    }

  gomp_mutex_unlock (&acc_dev->lock);

  if (f & FLAG_COPYOUT)
    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);

  acc_unmap_data (h);

  if (!acc_dev->free_func (acc_dev->target_id, d))
    gomp_fatal ("error in freeing device memory in %s", libfnname);
}
Exemple #29
0
attribute_hidden void
goacc_lazy_initialize (void)
{
  struct goacc_thread *thr = goacc_thread ();

  if (thr && thr->dev)
    return;

  if (!base_dev)
    lazy_init_and_open (acc_device_default);
  else
    {
      gomp_mutex_lock (&acc_device_lock);
      lazy_open (-1);
      gomp_mutex_unlock (&acc_device_lock);
    }
}
Exemple #30
0
void *
acc_malloc (size_t s)
{
  if (!s)
    return NULL;

  goacc_lazy_initialize ();

  struct goacc_thread *thr = goacc_thread ();

  assert (thr->dev);

  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
    return malloc (s);

  return thr->dev->alloc_func (thr->dev->target_id, s);
}