bool GOMP_OFFLOAD_unload_image (int ord, unsigned version, const void *target_data) { struct ptx_image_data *image, **prev_p; struct ptx_device *dev = ptx_devices[ord]; if (GOMP_VERSION_DEV (version) > GOMP_VERSION_NVIDIA_PTX) { GOMP_PLUGIN_error ("Offload data incompatible with PTX plugin" " (expected %u, received %u)", GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version)); return false; } bool ret = true; pthread_mutex_lock (&dev->image_lock); for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next) if (image->target_data == target_data) { *prev_p = image->next; if (cuModuleUnload (image->module) != CUDA_SUCCESS) ret = false; free (image->fns); free (image); break; } pthread_mutex_unlock (&dev->image_lock); return ret; }
void GOMP_OFFLOAD_unload_image (int ord, unsigned version, const void *target_data) { struct ptx_image_data *image, **prev_p; struct ptx_device *dev = ptx_devices[ord]; if (GOMP_VERSION_DEV (version) > GOMP_VERSION_NVIDIA_PTX) return; pthread_mutex_lock (&dev->image_lock); for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next) if (image->target_data == target_data) { *prev_p = image->next; cuModuleUnload (image->module); free (image->fns); free (image); break; } pthread_mutex_unlock (&dev->image_lock); }
int GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct addr_pair **target_table) { CUmodule module; const char *const *var_names; const struct targ_fn_launch *fn_descs; unsigned int fn_entries, var_entries, i, j; CUresult r; struct targ_fn_descriptor *targ_fns; struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; struct ptx_image_data *new_image; struct ptx_device *dev; if (GOMP_VERSION_DEV (version) > GOMP_VERSION_NVIDIA_PTX) GOMP_PLUGIN_fatal ("Offload data incompatible with PTX plugin" " (expected %u, received %u)", GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version)); GOMP_OFFLOAD_init_device (ord); dev = ptx_devices[ord]; nvptx_attach_host_thread_to_device (ord); link_ptx (&module, img_header->ptx_objs, img_header->ptx_num); /* The mkoffload utility emits a struct of pointers/integers at the start of each offload image. The array of kernel names and the functions addresses form a one-to-one correspondence. */ var_entries = img_header->var_num; var_names = img_header->var_names; fn_entries = img_header->fn_num; fn_descs = img_header->fn_descs; targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) * (fn_entries + var_entries)); targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) * fn_entries); *target_table = targ_tbl; new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data)); new_image->target_data = target_data; new_image->module = module; new_image->fns = targ_fns; pthread_mutex_lock (&dev->image_lock); new_image->next = dev->images; dev->images = new_image; pthread_mutex_unlock (&dev->image_lock); for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++) { CUfunction function; r = cuModuleGetFunction (&function, module, fn_descs[i].fn); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r)); targ_fns->fn = function; targ_fns->launch = &fn_descs[i]; targ_tbl->start = (uintptr_t) targ_fns; targ_tbl->end = targ_tbl->start + 1; } for (j = 0; j < var_entries; j++, targ_tbl++) { CUdeviceptr var; size_t bytes; r = cuModuleGetGlobal (&var, &bytes, module, var_names[j]); if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r)); targ_tbl->start = (uintptr_t) var; targ_tbl->end = targ_tbl->start + bytes; } return fn_entries + var_entries; }