void GOACC_parallel (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers, int vector_length, int async, int num_waits, ...) { goacc_save_and_set_bind (acc_device_host); fn (hostaddrs); goacc_restore_bind (); }
void GOACC_parallel (int device, void (*fn) (void *), const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers, int vector_length, int async, int num_waits, ...) { bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; va_list ap; struct goacc_thread *thr; struct gomp_device_descr *acc_dev; struct target_mem_desc *tgt; void **devaddrs; unsigned int i; struct splay_tree_key_s k; splay_tree_key tgt_fn_key; void (*tgt_fn); if (num_gangs != 1) gomp_fatal ("num_gangs (%d) different from one is not yet supported", num_gangs); if (num_workers != 1) gomp_fatal ("num_workers (%d) different from one is not yet supported", num_workers); gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n", __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async); select_acc_device (device); thr = goacc_thread (); acc_dev = thr->dev; /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (host_fallback) { goacc_save_and_set_bind (acc_device_host); fn (hostaddrs); goacc_restore_bind (); return; } else if (acc_device_type (acc_dev->type) == acc_device_host) { fn (hostaddrs); return; } va_start (ap, num_waits); if (num_waits > 0) goacc_wait (async, num_waits, ap); va_end (ap); acc_dev->openacc.async_set_async_func (async); if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; k.host_end = k.host_start + 1; gomp_mutex_lock (&acc_dev->mem_map.lock); tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k); gomp_mutex_unlock (&acc_dev->mem_map.lock); if (tgt_fn_key == NULL) gomp_fatal ("target function wasn't mapped"); tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start; } else tgt_fn = (void (*)) fn; tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, false); devaddrs = alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start + tgt->list[i]->tgt_offset); acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs, num_workers, vector_length, async, tgt); /* If running synchronously, unmap immediately. */ if (async < acc_async_noval) gomp_unmap_vars (tgt, true); else { gomp_copy_from_async (tgt); acc_dev->openacc.register_async_cleanup_func (tgt); } acc_dev->openacc.async_set_async_func (acc_async_sync); }
void GOACC_parallel_keyed (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, ...) { bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; va_list ap; struct goacc_thread *thr; struct gomp_device_descr *acc_dev; struct target_mem_desc *tgt; void **devaddrs; unsigned int i; struct splay_tree_key_s k; splay_tree_key tgt_fn_key; void (*tgt_fn); int async = GOMP_ASYNC_SYNC; unsigned dims[GOMP_DIM_MAX]; unsigned tag; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); #else gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); #endif goacc_lazy_initialize (); thr = goacc_thread (); acc_dev = thr->dev; /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (host_fallback) { goacc_save_and_set_bind (acc_device_host); fn (hostaddrs); goacc_restore_bind (); return; } else if (acc_device_type (acc_dev->type) == acc_device_host) { fn (hostaddrs); return; } va_start (ap, kinds); /* TODO: This will need amending when device_type is implemented. */ while ((tag = va_arg (ap, unsigned)) != 0) { if (GOMP_LAUNCH_DEVICE (tag)) gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", GOMP_LAUNCH_DEVICE (tag)); switch (GOMP_LAUNCH_CODE (tag)) { case GOMP_LAUNCH_DIM: { unsigned mask = GOMP_LAUNCH_OP (tag); for (i = 0; i != GOMP_DIM_MAX; i++) if (mask & GOMP_DIM_MASK (i)) dims[i] = va_arg (ap, unsigned); } break; case GOMP_LAUNCH_ASYNC: { /* Small constant values are encoded in the operand. */ async = GOMP_LAUNCH_OP (tag); if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); break; } case GOMP_LAUNCH_WAIT: { unsigned num_waits = GOMP_LAUNCH_OP (tag); if (num_waits) goacc_wait (async, num_waits, &ap); break; } default: gomp_fatal ("unrecognized offload code '%d'," " libgomp is too old", GOMP_LAUNCH_CODE (tag)); } } va_end (ap); acc_dev->openacc.async_set_async_func (async); if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; k.host_end = k.host_start + 1; gomp_mutex_lock (&acc_dev->lock); tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); gomp_mutex_unlock (&acc_dev->lock); if (tgt_fn_key == NULL) gomp_fatal ("target function wasn't mapped"); tgt_fn = (void (*)) tgt_fn_key->tgt_offset; } else tgt_fn = (void (*)) fn; tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + tgt->list[i].key->tgt_offset); acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, async, dims, tgt); /* If running synchronously, unmap immediately. */ if (async < acc_async_noval) gomp_unmap_vars (tgt, true); else { gomp_copy_from_async (tgt); acc_dev->openacc.register_async_cleanup_func (tgt); } acc_dev->openacc.async_set_async_func (acc_async_sync); }