static int _ibv_attach(map_segment_t *s, size_t size) { int rc = OSHMEM_SUCCESS; static openib_device_t memheap_device; openib_device_t *device = &memheap_device; int num_devs = 0; assert(s); memset(device, 0, sizeof(*device)); #ifdef HAVE_IBV_GET_DEVICE_LIST device->ib_devs = ibv_get_device_list(&num_devs); #else #error unsupported ibv_get_device_list in infiniband/verbs.h #endif if (num_devs == 0 || !device->ib_devs) { rc = OSHMEM_ERR_NOT_SUPPORTED; } /* Open device */ if (!rc) { int i = 0; if (num_devs > 1) { if (NULL == mca_memheap_base_param_hca_name) { MEMHEAP_VERBOSE(5, "found %d HCAs, choosing the first", num_devs); } else { MEMHEAP_VERBOSE(5, "found %d HCAs, searching for %s", num_devs, mca_memheap_base_param_hca_name); } } for (i = 0; i < num_devs; i++) { device->ib_dev = device->ib_devs[i]; device->ib_dev_context = ibv_open_device(device->ib_dev); if (NULL == device->ib_dev_context) { MEMHEAP_ERROR("error obtaining device context for %s errno says %d: %s", ibv_get_device_name(device->ib_dev), errno, strerror(errno)); rc = OSHMEM_ERR_RESOURCE_BUSY; } else { if (NULL != mca_memheap_base_param_hca_name) { if (0 == strcmp(mca_memheap_base_param_hca_name,ibv_get_device_name(device->ib_dev))) { MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs); rc = OSHMEM_SUCCESS; break; } } else { MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs); rc = OSHMEM_SUCCESS; break; } } } } /* Obtain device attributes */ if (!rc) { if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) { MEMHEAP_ERROR("error obtaining device attributes for %s errno says %d: %s", ibv_get_device_name(device->ib_dev), errno, strerror(errno)); rc = OSHMEM_ERR_RESOURCE_BUSY; } else { MEMHEAP_VERBOSE(5, "ibv device %s", ibv_get_device_name(device->ib_dev)); } } /* Allocate the protection domain for the device */ if (!rc) { device->ib_pd = ibv_alloc_pd(device->ib_dev_context); if (NULL == device->ib_pd) { MEMHEAP_ERROR("error allocating protection domain for %s errno says %d: %s", ibv_get_device_name(device->ib_dev), errno, strerror(errno)); rc = OSHMEM_ERR_RESOURCE_BUSY; } } /* Allocate memory */ if (!rc) { void *addr = NULL; struct ibv_mr *ib_mr = NULL; int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t); opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *)); #if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0) access_flag |= IBV_ACCESS_ALLOCATE_MR | IBV_ACCESS_SHARED_MR_USER_READ | IBV_ACCESS_SHARED_MR_USER_WRITE; #endif /* MPAGE_ENABLE */ ib_mr = ibv_reg_mr(device->ib_pd, addr, size, access_flag); if (NULL == ib_mr) { MEMHEAP_ERROR("error to ibv_reg_mr() %llu bytes errno says %d: %s", (unsigned long long)size, errno, strerror(errno)); rc = OSHMEM_ERR_OUT_OF_RESOURCE; } else { device->ib_mr_shared = ib_mr; opal_value_array_append_item(&device->ib_mr_array, &ib_mr); } #if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0) if (!rc) { access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ| IBV_ACCESS_NO_RDMA; addr = (void *)mca_memheap_base_start_address; ib_mr = ibv_reg_shared_mr(device->ib_mr_shared->handle, device->ib_pd, addr, access_flag); if (NULL == ib_mr) { MEMHEAP_ERROR("error to ibv_reg_shared_mr() %llu bytes errno says %d: %s", (unsigned long long)size, errno, strerror(errno)); rc = OSHMEM_ERR_OUT_OF_RESOURCE; } else { opal_value_array_append_item(&device->ib_mr_array, &ib_mr); } } #endif /* MPAGE_ENABLE */ if (!rc) { assert(size == device->ib_mr_shared->length); s->type = MAP_SEGMENT_ALLOC_IBV; s->shmid = device->ib_mr_shared->handle; s->start = ib_mr->addr; s->size = size; s->end = (void*)((uintptr_t)s->start + s->size); s->context = &memheap_device; } } return rc; }
static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id) { /* process special case when va was got using shmget(IPC_PRIVATE) * this case is notable for: * - key is set as (type|shmid); * - va_base is set as 0; */ if (!mkey->va_base && ((int) MEMHEAP_SHM_GET_ID(mkey->key) != MEMHEAP_SHM_INVALID)) { MEMHEAP_VERBOSE(5, "shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X", tr_id, (unsigned long long)mkey->key, mkey->va_base, MEMHEAP_SHM_GET_TYPE(mkey->key), MEMHEAP_SHM_GET_ID(mkey->key)); if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_SHM) { mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->key), 0, 0); } else if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_IBV) { #if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0) openib_device_t *device = NULL; struct ibv_mr *ib_mr; void *addr; static int mr_count; int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_NO_RDMA; device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context; assert(device); /* workaround mtt problem - request aligned addresses */ ++mr_count; addr = (void *)(mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count); ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->key), device->ib_pd, addr, access_flag); if (NULL == ib_mr) { mkey->va_base = (void*)-1; MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s", errno, strerror(errno)); } else { if (ib_mr->addr != addr) { MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor); } opal_value_array_append_item(&device->ib_mr_array, &ib_mr); mkey->va_base = ib_mr->addr; } #endif /* MPAGE_ENABLE */ } else { MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X", tr_id, (unsigned long long)mkey->key, MEMHEAP_SHM_GET_TYPE(mkey->key), MEMHEAP_SHM_GET_ID(mkey->key)); oshmem_shmem_abort(-1); } if ((void *) -1 == (void *) mkey->va_base) { MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d", tr_id, (unsigned long long)mkey->key, errno); oshmem_shmem_abort(-1); } } }