static void verify_taskgroup_queue (struct gomp_task *task) { struct gomp_taskgroup *taskgroup = task->taskgroup; if (!taskgroup) return; bool seen_tied = false; bool found = false; struct gomp_task *t = taskgroup->children; while (1) { if (t == task) found = true; if (t->kind == GOMP_TASK_WAITING && seen_tied) gomp_fatal ("verify_taskgroup_queue: WAITING task after TIED"); if (t->kind == GOMP_TASK_TIED) seen_tied = true; t = t->next_taskgroup; if (t == taskgroup->children) break; } if (!found) gomp_fatal ("verify_taskgroup_queue: child not found in parent queue"); }
static void priority_list_verify (enum priority_queue_type type, struct priority_list *list, bool check_deps) { bool seen_tied = false; bool seen_plain_waiting = false; struct priority_node *p = list->tasks; while (1) { struct gomp_task *t = priority_node_to_task (type, p); if (seen_tied && t->kind == GOMP_TASK_WAITING) gomp_fatal ("priority_queue_verify: WAITING task after TIED"); if (t->kind >= GOMP_TASK_TIED) seen_tied = true; else if (check_deps && t->kind == GOMP_TASK_WAITING) { if (t->parent_depends_on) { if (seen_plain_waiting) gomp_fatal ("priority_queue_verify: " "parent_depends_on after !parent_depends_on"); } else seen_plain_waiting = true; } p = p->next; if (p == list->tasks) break; } }
static struct gomp_device_descr * acc_init_1 (acc_device_t d) { struct gomp_device_descr *base_dev, *acc_dev; int ndevs; base_dev = resolve_device (d); ndevs = base_dev->get_num_devices_func (); if (!base_dev || ndevs <= 0 || goacc_device_num >= ndevs) gomp_fatal ("device %s not supported", name_of_acc_device_t (d)); acc_dev = &base_dev[goacc_device_num]; gomp_mutex_lock (&acc_dev->lock); if (acc_dev->is_initialized) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("device already active"); } gomp_init_device (acc_dev); gomp_mutex_unlock (&acc_dev->lock); return base_dev; }
void acc_unmap_data (void *h) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; /* No need to call lazy open, as the address must have been mapped. */ size_t host_size; gomp_mutex_lock (&acc_dev->lock); splay_tree_key n = lookup_host (acc_dev, h, 1); struct target_mem_desc *t; if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("%p is not a mapped block", (void *)h); } host_size = n->host_end - n->host_start; if (n->host_start != (uintptr_t) h) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] surrounds %p", (void *) n->host_start, (int) host_size, (void *) h); } t = n->tgt; if (t->refcount == 2) { struct target_mem_desc *tp; /* This is the last reference, so pull the descriptor off the chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from freeing the device memory. */ t->tgt_end = 0; t->to_free = 0; for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; tp = t, t = t->prev) if (n->tgt == t) { if (tp) tp->prev = t->prev; else acc_dev->openacc.data_environ = t->prev; break; } } gomp_mutex_unlock (&acc_dev->lock); gomp_unmap_vars (t, true); }
/* Emit a suitable error if no device of a particular type is available, or the given device number is out-of-range. */ static void acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) { if (ndevs == 0) gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); else gomp_fatal ("device %u out of range", ord); }
void acc_map_data (void *h, void *d, size_t s) { struct target_mem_desc *tgt; size_t mapnum = 1; void *hostaddrs = h; void *devaddrs = d; size_t sizes = s; unsigned short kinds = GOMP_MAP_ALLOC; goacc_lazy_initialize (); struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { if (d != h) gomp_fatal ("cannot map data on shared-memory system"); tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, GOMP_MAP_VARS_OPENACC); } else { struct goacc_thread *thr = goacc_thread (); if (!d || !h || !s) gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", (void *)h, (int)s, (void *)d, (int)s); gomp_mutex_lock (&acc_dev->lock); if (lookup_host (acc_dev, h, s)) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, (int)s); } if (lookup_dev (thr->dev->openacc.data_environ, d, s)) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, (int)s); } gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, &kinds, true, GOMP_MAP_VARS_OPENACC); } gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; acc_dev->openacc.data_environ = tgt; gomp_mutex_unlock (&acc_dev->lock); }
static struct gomp_device_descr * resolve_device (acc_device_t d) { acc_device_t d_arg = d; switch (d) { case acc_device_default: { if (goacc_device_type) { /* Lookup the named device. */ while (++d != _ACC_device_hwm) if (dispatchers[d] && !strcasecmp (goacc_device_type, get_openacc_name (dispatchers[d]->name)) && dispatchers[d]->get_num_devices_func () > 0) goto found; gomp_fatal ("device type %s not supported", goacc_device_type); } /* No default device specified, so start scanning for any non-host device that is available. */ d = acc_device_not_host; } /* FALLTHROUGH */ case acc_device_not_host: /* Find the first available device after acc_device_not_host. */ while (++d != _ACC_device_hwm) if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0) goto found; if (d_arg == acc_device_default) { d = acc_device_host; goto found; } gomp_fatal ("no device found"); break; case acc_device_host: break; default: if (d > _ACC_device_hwm) gomp_fatal ("device %u out of range", (unsigned)d); break; } found: assert (d != acc_device_none && d != acc_device_default && d != acc_device_not_host); return dispatchers[d]; }
static void acc_shutdown_1 (acc_device_t d) { struct goacc_thread *walk; /* We don't check whether d matches the actual device found, because OpenACC 2.0 (3.2.12) says the parameters to the init and this call must match (for the shutdown call anyway, it's silent on others). */ if (!base_dev) gomp_fatal ("no device initialized"); if (d != init_key) gomp_fatal ("device %u(%u) is initialized", (unsigned) init_key, (unsigned) base_dev->type); gomp_mutex_lock (&goacc_thread_lock); /* Free target-specific TLS data and close all devices. */ for (walk = goacc_threads; walk != NULL; walk = walk->next) { if (walk->target_tls) base_dev->openacc.destroy_thread_data_func (walk->target_tls); walk->target_tls = NULL; /* This would mean the user is shutting down OpenACC in the middle of an "acc data" pragma. Likely not intentional. */ if (walk->mapped_data) gomp_fatal ("shutdown in 'acc data' region"); if (walk->dev) { void *target_data = walk->dev->openacc.target_data; if (walk->dev->openacc.close_device_func (target_data) < 0) gomp_fatal ("failed to close device"); walk->dev->openacc.target_data = target_data = NULL; struct gomp_memory_mapping *mem_map = &walk->dev->mem_map; gomp_mutex_lock (&mem_map->lock); gomp_free_memmap (mem_map); gomp_mutex_unlock (&mem_map->lock); walk->dev = NULL; } } gomp_mutex_unlock (&goacc_thread_lock); gomp_fini_device (base_dev); base_dev = NULL; }
void acc_wait_all_async (int async) { if (async < acc_async_sync) gomp_fatal ("invalid async argument: %d", async); struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); thr->dev->openacc.async_wait_all_async_func (async); }
void gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; splay_tree_key n; struct target_mem_desc *t; int minrefs = (mapnum == 1) ? 2 : 3; gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, 1); if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("%p is not a mapped block", (void *)h); } gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); t = n->tgt; struct target_mem_desc *tp; if (t->refcount == minrefs) { /* This is the last reference, so pull the descriptor off the chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from freeing the device memory. */ t->tgt_end = 0; t->to_free = 0; for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; tp = t, t = t->prev) { if (n->tgt == t) { if (tp) tp->prev = t->prev; else acc_dev->openacc.data_environ = t->prev; break; } } } if (force_copyfrom) t->list[0].copy_from = 1; gomp_mutex_unlock (&acc_dev->lock); /* If running synchronously, unmap immediately. */ if (async < acc_async_noval) gomp_unmap_vars (t, true); else t->device_descr->openacc.register_async_cleanup_func (t, async); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); }
static void update_dev_host (int is_dev, void *h, size_t s) { splay_tree_key n; void *d; struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); /* No need to call lazy open, as the data must already have been mapped. */ if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] is not mapped", h, (int)s); } d = (void *) (n->tgt->tgt_start + n->tgt_offset); gomp_mutex_unlock (&acc_dev->lock); if (is_dev) acc_dev->host2dev_func (acc_dev->target_id, d, h, s); else acc_dev->dev2host_func (acc_dev->target_id, h, d, s); }
void priority_tree_remove (enum priority_queue_type type, struct priority_queue *head, struct priority_node *node) { /* ?? The only reason this function is not inlined is because we need to find the priority within gomp_task (which has not been completely defined in the header file). If the lack of inlining is a concern, we could pass the priority number as a parameter, or we could move this to libgomp.h. */ int priority = priority_node_to_task (type, node)->priority; /* ?? We could avoid this lookup by keeping a pointer to the key in the priority_node. */ struct priority_list *list = priority_queue_lookup_priority (head, priority); #if _LIBGOMP_CHECKING_ if (!list) gomp_fatal ("Unable to find priority %d", priority); #endif /* If NODE was the last in its priority, clean up the priority. */ if (priority_list_remove (list, node, MEMMODEL_RELAXED)) { prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); list->tasks = NULL; #if _LIBGOMP_CHECKING_ memset (list, 0xaf, sizeof (*list)); #endif free (list); } }
void acc_set_device_type (acc_device_t d) { struct gomp_device_descr *base_dev, *acc_dev; struct goacc_thread *thr = goacc_thread (); gomp_mutex_lock (&acc_device_lock); if (!cached_base_dev) gomp_init_targets_once (); cached_base_dev = base_dev = resolve_device (d); acc_dev = &base_dev[goacc_device_num]; gomp_mutex_lock (&acc_dev->lock); if (!acc_dev->is_initialized) gomp_init_device (acc_dev); gomp_mutex_unlock (&acc_dev->lock); gomp_mutex_unlock (&acc_device_lock); /* We're changing device type: invalidate the current thread's dev and base_dev pointers. */ if (thr && thr->base_dev != base_dev) { thr->base_dev = thr->dev = NULL; if (thr->mapped_data) gomp_fatal ("acc_set_device_type in 'acc data' region"); } goacc_attach_host_thread_to_device (-1); }
static void update_dev_host (int is_dev, void *h, size_t s) { splay_tree_key n; void *d; goacc_lazy_initialize (); struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] is not mapped", h, (int)s); } d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); if (is_dev) acc_dev->host2dev_func (acc_dev->target_id, d, h, s); else acc_dev->dev2host_func (acc_dev->target_id, h, d, s); gomp_mutex_unlock (&acc_dev->lock); }
static struct gomp_device_descr * acc_init_1 (acc_device_t d) { struct gomp_device_descr *base_dev, *acc_dev; int ndevs; base_dev = resolve_device (d, true); ndevs = base_dev->get_num_devices_func (); if (ndevs <= 0 || goacc_device_num >= ndevs) acc_dev_num_out_of_range (d, goacc_device_num, ndevs); acc_dev = &base_dev[goacc_device_num]; gomp_mutex_lock (&acc_dev->lock); if (acc_dev->is_initialized) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("device already active"); } gomp_init_device (acc_dev); gomp_mutex_unlock (&acc_dev->lock); return base_dev; }
static void delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) { size_t host_size; splay_tree_key n; void *d; struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); /* No need to call lazy open, as the data must already have been mapped. */ if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); } d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); host_size = n->host_end - n->host_start; if (n->host_start != (uintptr_t) h || host_size != s) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", (void *) n->host_start, (int) host_size, (void *) h, (int) s); } gomp_mutex_unlock (&acc_dev->lock); if (f & FLAG_COPYOUT) acc_dev->dev2host_func (acc_dev->target_id, h, d, s); acc_unmap_data (h); if (!acc_dev->free_func (acc_dev->target_id, d)) gomp_fatal ("error in freeing device memory in %s", libfnname); }
void * gomp_malloc_cleared (size_t size) { void *ret = calloc (1, size); if (ret == NULL) gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); return ret; }
void * gomp_realloc (void *old, size_t size) { void *ret = realloc (old, size); if (ret == NULL) gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); return ret; }
void GOACC_declare (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds) { int i; for (i = 0; i < mapnum; i++) { unsigned char kind = kinds[i] & 0xff; if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) continue; switch (kind) { case GOMP_MAP_FORCE_ALLOC: case GOMP_MAP_FORCE_DEALLOC: case GOMP_MAP_FORCE_FROM: case GOMP_MAP_FORCE_TO: case GOMP_MAP_POINTER: GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], 0, 0); break; case GOMP_MAP_FORCE_DEVICEPTR: break; case GOMP_MAP_ALLOC: if (!acc_is_present (hostaddrs[i], sizes[i])) GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], 0, 0); break; case GOMP_MAP_TO: GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], 0, 0); break; case GOMP_MAP_FROM: kinds[i] = GOMP_MAP_FORCE_FROM; GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i], &kinds[i], 0, 0); break; case GOMP_MAP_FORCE_PRESENT: if (!acc_is_present (hostaddrs[i], sizes[i])) gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], (unsigned long) sizes[i]); break; default: assert (0); break; } } }
void GOACC_update (int device, const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, int async, int num_waits, ...) { bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; size_t i; select_acc_device (device); struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || host_fallback) return; if (num_waits > 0) { va_list ap; va_start (ap, num_waits); goacc_wait (async, num_waits, ap); va_end (ap); } acc_dev->openacc.async_set_async_func (async); for (i = 0; i < mapnum; ++i) { unsigned char kind = kinds[i] & 0xff; switch (kind) { case GOMP_MAP_POINTER: case GOMP_MAP_TO_PSET: break; case GOMP_MAP_FORCE_TO: acc_update_device (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_FROM: acc_update_self (hostaddrs[i], sizes[i]); break; default: gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); break; } } acc_dev->openacc.async_set_async_func (acc_async_sync); }
void acc_wait_all (void) { struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); thr->dev->openacc.async_wait_all_func (); }
int GOMP_PLUGIN_acc_default_dim (unsigned int i) { if (i >= GOMP_DIM_MAX) { gomp_fatal ("invalid dimension argument: %d", i); return -1; } return goacc_default_dims[i]; }
void acc_wait_async (int async1, int async2) { struct goacc_thread *thr = goacc_thread (); if (!thr || !thr->dev) gomp_fatal ("no device active"); thr->dev->openacc.async_wait_async_func (async1, async2); }
static void delete_copyout (unsigned f, void *h, size_t s) { size_t host_size; splay_tree_key n; void *d; struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); /* No need to call lazy open, as the data must already have been mapped. */ if (!n) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); } d = (void *) (n->tgt->tgt_start + n->tgt_offset); host_size = n->host_end - n->host_start; if (n->host_start != (uintptr_t) h || host_size != s) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", (void *) n->host_start, (int) host_size, (void *) h, (int) s); } gomp_mutex_unlock (&acc_dev->lock); if (f & FLAG_COPYOUT) acc_dev->dev2host_func (acc_dev->target_id, h, d, s); acc_unmap_data (h); acc_dev->free_func (acc_dev->target_id, d); }
static struct gomp_device_descr * acc_init_1 (acc_device_t d) { struct gomp_device_descr *acc_dev; acc_dev = resolve_device (d); if (!acc_dev || acc_dev->get_num_devices_func () <= 0) gomp_fatal ("device %u not supported", (unsigned)d); if (acc_dev->is_initialized) gomp_fatal ("device already active"); /* We need to remember what we were intialized as, to check shutdown etc. */ init_key = d; gomp_init_device (acc_dev); return acc_dev; }
static char * parse_thread_pools (char *env, unsigned long *count, unsigned long *priority, unsigned long *scheduler) { size_t len; int i; if (*env == ':') ++env; errno = 0; *count = strtoul (env, &env, 10); if (errno != 0) gomp_fatal ("Invalid thread pool count"); if (*env == '$') { ++env; errno = 0; *priority = strtoul (env, &env, 10); if (errno != 0) gomp_fatal ("Invalid thread pool priority"); } else *priority = -1; if (*env != '@') gomp_fatal ("Invalid thread pool scheduler prefix"); ++env; len = 0; while (env[len] != '\0' && env[len] != ':') ++len; i = _Sched_Name_to_index (env, len); if (i < 0) gomp_fatal ("Invalid thread pool scheduler"); *scheduler = i; env += len; return env; }
static void verify_children_queue (struct gomp_task *task, struct gomp_task *parent) { if (task->parent != parent) gomp_fatal ("verify_children_queue: incompatible parents"); /* It's OK, Annie was an orphan and she turned out all right. */ if (!parent) return; bool seen_tied = false; bool seen_plain_waiting = false; bool found = false; struct gomp_task *t = parent->children; while (1) { if (t == task) found = true; if (seen_tied && t->kind == GOMP_TASK_WAITING) gomp_fatal ("verify_children_queue: WAITING task after TIED"); if (t->kind == GOMP_TASK_TIED) seen_tied = true; else if (t->kind == GOMP_TASK_WAITING) { if (t->parent_depends_on) { if (seen_plain_waiting) gomp_fatal ("verify_children_queue: parent_depends_on after " "!parent_depends_on"); } else seen_plain_waiting = true; } t = t->next_child; if (t == parent->children) break; } if (!found) gomp_fatal ("verify_children_queue: child not found in parent queue"); }
static const char * name_of_acc_device_t (enum acc_device_t type) { switch (type) { case acc_device_none: return "none"; case acc_device_default: return "default"; case acc_device_host: return "host"; case acc_device_not_host: return "not_host"; case acc_device_nvidia: return "nvidia"; default: gomp_fatal ("unknown device type %u", (unsigned) type); } }
int acc_get_device_num (acc_device_t d) { const struct gomp_device_descr *dev; struct goacc_thread *thr = goacc_thread (); if (d >= _ACC_device_hwm) gomp_fatal ("device %u out of range", (unsigned)d); if (!cached_base_dev) gomp_init_targets_once (); gomp_mutex_lock (&acc_device_lock); dev = resolve_device (d); gomp_mutex_unlock (&acc_device_lock); if (!dev) gomp_fatal ("device %s not supported", name_of_acc_device_t (d)); if (thr && thr->base_dev == dev && thr->dev) return thr->dev->target_id; return goacc_device_num; }
void acc_set_device_num (int n, acc_device_t d) { const struct gomp_device_descr *dev; int num_devices; if (!base_dev) gomp_init_targets_once (); if ((int) d == 0) { int i; /* A device setting of zero sets all device types on the system to use the Nth instance of that device type. Only attempt it for initialized devices though. */ for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++) { dev = resolve_device (d); if (dev && dev->is_initialized) dev->openacc.set_device_num_func (n); } /* ...and for future calls to acc_init/acc_set_device_type, etc. */ goacc_device_num = n; } else { struct goacc_thread *thr = goacc_thread (); gomp_mutex_lock (&acc_device_lock); base_dev = lazy_init (d); num_devices = base_dev->get_num_devices_func (); if (n >= num_devices) gomp_fatal ("device %u out of range", n); /* If we're changing the device number, de-associate this thread with the device (but don't close the device, since it may be in use by other threads). */ if (thr && thr->dev && n != thr->dev->target_id) thr->dev = NULL; lazy_open (n); gomp_mutex_unlock (&acc_device_lock); } }