mca_spml_mkey_t *mca_spml_yoda_register(void* addr, size_t size, uint64_t shmid, int *count) { int i; mca_btl_base_descriptor_t* des = NULL; const opal_datatype_t *datatype = &opal_datatype_wchar; opal_convertor_t convertor; mca_spml_mkey_t *mkeys; struct yoda_btl *ybtl; oshmem_proc_t *proc_self; mca_spml_yoda_context_t* yoda_context; struct iovec iov; uint32_t iov_count = 1; SPML_VERBOSE(10, "address %p len %llu", addr, (unsigned long long)size); *count = 0; /* make sure everything is initialized to 0 */ mkeys = (mca_spml_mkey_t *) calloc(1, mca_spml_yoda.n_btls * sizeof(*mkeys)); if (!mkeys) { return NULL ; } proc_self = oshmem_proc_group_find(oshmem_group_all, oshmem_my_proc_id()); /* create convertor */ OBJ_CONSTRUCT(&convertor, opal_convertor_t); mca_bml.bml_register( MCA_SPML_YODA_PUT, mca_yoda_put_callback, NULL ); mca_bml.bml_register( MCA_SPML_YODA_GET, mca_yoda_get_callback, NULL ); mca_bml.bml_register( MCA_SPML_YODA_GET_RESPONSE, mca_yoda_get_response_callback, NULL ); /* Register proc memory in every rdma BTL. */ for (i = 0; i < mca_spml_yoda.n_btls; i++) { ybtl = &mca_spml_yoda.btl_type_map[i]; mkeys[i].va_base = addr; if (!ybtl->use_cnt) { SPML_VERBOSE(10, "%s: present but not in use. SKIP registration", btl_type2str(ybtl->btl_type)); continue; } /* If we have shared memory just save its id*/ if (YODA_BTL_SM == ybtl->btl_type && MEMHEAP_SHM_INVALID != (int) MEMHEAP_SHM_GET_ID(shmid)) { mkeys[i].u.key = shmid; mkeys[i].va_base = 0; continue; } yoda_context = calloc(1, sizeof(*yoda_context)); mkeys[i].spml_context = yoda_context; yoda_context->registration = NULL; if (NULL != ybtl->btl->btl_prepare_src) { /* initialize convertor for source descriptor*/ opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor, datatype, size, addr, 0, &convertor); if (NULL != ybtl->btl->btl_mpool && NULL != ybtl->btl->btl_mpool->mpool_register) { iov.iov_len = size; iov.iov_base = NULL; opal_convertor_pack(&convertor, &iov, &iov_count, &size); ybtl->btl->btl_mpool->mpool_register(ybtl->btl->btl_mpool, iov.iov_base, size, 0, &yoda_context->registration); } /* initialize convertor for source descriptor*/ opal_convertor_copy_and_prepare_for_recv(proc_self->proc_convertor, datatype, size, addr, 0, &convertor); /* register source memory */ des = ybtl->btl->btl_prepare_src(ybtl->btl, 0, yoda_context->registration, &convertor, MCA_BTL_NO_ORDER, 0, &size, 0); if (NULL == des) { SPML_ERROR("%s: failed to register source memory. ", btl_type2str(ybtl->btl_type)); } yoda_context->btl_src_descriptor = des; mkeys[i].u.data = des->des_src; mkeys[i].len = ybtl->btl->btl_seg_size; } SPML_VERBOSE(5, "rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X", oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); } OBJ_DESTRUCT(&convertor); *count = mca_spml_yoda.n_btls; return mkeys; }
static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id) { /* process special case when va was got using shmget(IPC_PRIVATE) * this case is notable for: * - key is set as (type|shmid); * - va_base is set as 0; */ if (!mkey->va_base && ((int) MEMHEAP_SHM_GET_ID(mkey->key) != MEMHEAP_SHM_INVALID)) { MEMHEAP_VERBOSE(5, "shared memory usage tr_id: %d key %llx base_va %p shmid 0x%X|0x%X", tr_id, (unsigned long long)mkey->key, mkey->va_base, MEMHEAP_SHM_GET_TYPE(mkey->key), MEMHEAP_SHM_GET_ID(mkey->key)); if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_SHM) { mkey->va_base = shmat(MEMHEAP_SHM_GET_ID(mkey->key), 0, 0); } else if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_IBV) { #if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0) openib_device_t *device = NULL; struct ibv_mr *ib_mr; void *addr; static int mr_count; int access_flag = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_NO_RDMA; device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context; assert(device); /* workaround mtt problem - request aligned addresses */ ++mr_count; addr = (void *)(mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count); ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->key), device->ib_pd, addr, access_flag); if (NULL == ib_mr) { mkey->va_base = (void*)-1; MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s", errno, strerror(errno)); } else { if (ib_mr->addr != addr) { MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor); } opal_value_array_append_item(&device->ib_mr_array, &ib_mr); mkey->va_base = ib_mr->addr; } #endif /* MPAGE_ENABLE */ } else { MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X", tr_id, (unsigned long long)mkey->key, MEMHEAP_SHM_GET_TYPE(mkey->key), MEMHEAP_SHM_GET_ID(mkey->key)); oshmem_shmem_abort(-1); } if ((void *) -1 == (void *) mkey->va_base) { MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d", tr_id, (unsigned long long)mkey->key, errno); oshmem_shmem_abort(-1); } } }