static ucs_status_t uct_gdr_copy_query_md_resources(uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { int num_gpus; gdr_t ctx; cudaError_t cudaErr; cudaErr = cudaGetDeviceCount(&num_gpus); if ((cudaErr != cudaSuccess) || (num_gpus == 0)) { ucs_debug("not found cuda devices"); *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } ctx = gdr_open(); if (ctx == NULL) { ucs_debug("could not open gdr copy. disabling gdr copy resource"); *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } gdr_close(ctx); return uct_single_md_resource(&uct_gdr_copy_md_component, resources_p, num_resources_p); }
/* For Cray devices we have only one MD */ static ucs_status_t uct_ugni_query_md_resources(uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { if (getenv("PMI_GNI_PTAG") != NULL) { return uct_single_md_resource(&uct_ugni_md_component, resources_p, num_resources_p); } else { *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } }
static ucs_status_t uct_knem_query_md_resources(uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { int fd; int rc; struct knem_cmd_info info; memset(&info, 0, sizeof(struct knem_cmd_info)); fd = open("/dev/knem", O_RDWR); if (fd < 0) { ucs_debug("Could not open the KNEM device file at /dev/knem: %m. Disabling knem resource"); *resources_p = NULL; *num_resources_p = 0; return UCS_OK; } rc = ioctl(fd, KNEM_CMD_GET_INFO, &info); if (rc < 0) { *resources_p = NULL; *num_resources_p = 0; close(fd); ucs_debug("KNEM get info failed. not using knem, err = %d %m", rc); return UCS_OK; } if (KNEM_ABI_VERSION != info.abi) { *resources_p = NULL; *num_resources_p = 0; close(fd); ucs_error("KNEM ABI mismatch: KNEM_ABI_VERSION: %d, Driver binary interface version: %d", KNEM_ABI_VERSION, info.abi); return UCS_OK; } /* We have to close it since it is not clear * if it will be selected in future */ close(fd); return uct_single_md_resource(&uct_knem_md_component, resources_p, num_resources_p); }
static ucs_status_t uct_self_query_md_resources(uct_md_resource_desc_t **resources_p, unsigned *num_resources_p) { return uct_single_md_resource(&uct_self_md, resources_p, num_resources_p); }