int ompi_mtl_mxm_progress(void) { mxm_error_t err; err = mxm_progress(ompi_mtl_mxm.mxm_context); if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) { opal_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err)); } return 1; }
int spml_ikrit_progress(void) { mxm_error_t err; err = mxm_progress(mca_spml_ikrit.mxm_context); if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err)) { orte_show_help("help-oshmem-spml-ikrit.txt", "errors during mxm_progress", true, mxm_error_string(err)); } return 1; }
static int _mxm_poll(void) { int mpi_errno = MPI_SUCCESS; mxm_error_t ret = MXM_OK; ret = mxm_progress(mxm_obj->mxm_context); if ((MXM_OK != ret) && (MXM_ERR_NO_PROGRESS != ret)) { mpi_errno = MPI_ERR_OTHER; goto fn_fail; } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/* simple buffered put implementation. NOT IN USE * Problems: * - slighly worse performance than impl based on non buffered put * - fence complexity is O(n_active_connections) instead of O(n_connections_with_outstanding_puts). * Later is bounded by the network RTT & mxm ack timer. */ int mca_spml_ikrit_put_simple(void* dst_addr, size_t size, void* src_addr, int dst) { void *rva; mxm_send_req_t mxm_req; mxm_wait_t wait; int ptl_id; mxm_mem_key_t *mkey; static int count; ptl_id = get_ptl_id(dst); mkey = mca_spml_ikrit_get_mkey(dst, dst_addr, ptl_id, &rva, &mca_spml_ikrit); SPML_VERBOSE_FASTPATH(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva); if (NULL == mkey) { memcpy((void *) (unsigned long) rva, src_addr, size); /* call progress as often as we would have with regular put */ if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; } SPML_VERBOSE_FASTPATH(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, MXM_PTL_RDMA, dst_addr, src_addr, (int)size, (void *)rva); /* fill out request */ mxm_req.base.mq = mca_spml_ikrit.mxm_mq; mxm_req.flags = MXM_REQ_SEND_FLAG_BLOCKING; mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst].mxm_conn; mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; mxm_req.base.data.buffer.ptr = src_addr; mxm_req.base.data.buffer.length = size; mxm_req.base.completed_cb = 0; mxm_req.base.context = 0; mxm_req.opcode = MXM_REQ_OP_PUT; mxm_req.op.mem.remote_vaddr = (intptr_t) rva; mxm_req.base.state = MXM_REQ_NEW; mxm_req.base.error = MXM_OK; mxm_req.op.mem.remote_mkey = mkey; if (mca_spml_ikrit.mxm_peers[dst].need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst].link); mca_spml_ikrit.mxm_peers[dst].need_fence = 1; } SPML_IKRIT_MXM_POST_SEND(mxm_req); wait.req = &mxm_req.base; wait.state = (mxm_req_state_t)(MXM_REQ_SENT | MXM_REQ_COMPLETED); wait.progress_cb = NULL; wait.progress_arg = NULL; mxm_wait(&wait); return OSHMEM_SUCCESS; }
/** * TODO: using put request as handle is not good. */ static inline int mca_spml_ikrit_put_internal(void* dst_addr, size_t size, void* src_addr, int dst, void **handle, int zcopy) { void *rva; mca_spml_ikrit_put_request_t *put_req; int ptl_id; static int count; int need_progress = 0; mxm_mem_key_t *mkey; if (OPAL_UNLIKELY(0 >= size)) { return OSHMEM_SUCCESS; } ptl_id = get_ptl_id(dst); mkey = mca_spml_ikrit_get_mkey(dst, dst_addr, ptl_id, &rva, &mca_spml_ikrit); if (OPAL_UNLIKELY(NULL == mkey)) { memcpy((void *) (unsigned long) rva, src_addr, size); /* call progress as often as we would have with regular put */ if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; } SPML_VERBOSE_FASTPATH(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva); put_req = alloc_put_req(); if (handle) *handle = put_req; /* fill out request */ put_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq; /* request immediate responce if we are getting low on send buffers. We only get responce from remote on ack timeout. * Also request explicit ack once in a while */ put_req->mxm_req.flags = 0; if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || (int)opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max || (mca_spml_ikrit.mxm_peers[dst].n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) { need_progress = 1; put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; } else { put_req->mxm_req.opcode = MXM_REQ_OP_PUT; } if (!zcopy) { if (size < mca_spml_ikrit.put_zcopy_threshold) { put_req->mxm_req.flags |= MXM_REQ_SEND_FLAG_BLOCKING; } else { put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; } } put_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst].mxm_conn; put_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; put_req->mxm_req.base.data.buffer.ptr = src_addr; put_req->mxm_req.base.data.buffer.length = size; put_req->mxm_req.base.completed_cb = put_completion_cb; put_req->mxm_req.base.context = put_req; put_req->mxm_req.op.mem.remote_vaddr = (intptr_t) rva; put_req->mxm_req.base.state = MXM_REQ_NEW; put_req->pe = dst; put_req->mxm_req.op.mem.remote_mkey = mkey; OPAL_THREAD_ADD_FETCH32(&mca_spml_ikrit.n_active_puts, 1); if (mca_spml_ikrit.mxm_peers[dst].need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst].link); mca_spml_ikrit.mxm_peers[dst].need_fence = 1; } mca_spml_ikrit.mxm_peers[dst].n_active_puts++; SPML_IKRIT_MXM_POST_SEND(put_req->mxm_req); if (need_progress) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; }
/* simple buffered put implementation. NOT IN USE * Problems: * - slighly worse performance than impl based on non buffered put * - fence complexity is O(n_active_connections) instead of O(n_connections_with_outstanding_puts). * Later is bounded by the network RTT & mxm ack timer. */ int mca_spml_ikrit_put_simple(void* dst_addr, size_t size, void* src_addr, int dst) { void *rva; mxm_send_req_t mxm_req; mxm_wait_t wait; int ptl_id; sshmem_mkey_t *r_mkey; static int count; ptl_id = get_ptl_id(dst); /* Get rkey of remote PE (dst proc) which must be on memheap */ r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva); if (!r_mkey) { SPML_ERROR("pe=%d: %p is not address of shared variable", dst, dst_addr); oshmem_shmem_abort(-1); return OSHMEM_ERROR; } #if SPML_IKRIT_PUT_DEBUG == 1 SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif if (ptl_id == MXM_PTL_SHM) { if (mca_memheap_base_can_local_copy(r_mkey, dst_addr)) { memcpy((void *) (unsigned long) rva, src_addr, size); /* call progress as often as we would have with regular put */ if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; } /* segment not mapped - fallback to rmda */ ptl_id = MXM_PTL_RDMA; r_mkey = mca_memheap_base_get_cached_mkey(dst, //(unsigned long) dst_addr, dst_addr, ptl_id, &rva); if (!r_mkey) { SPML_ERROR("pe=%d: %p is not address of shared variable", dst, dst_addr); oshmem_shmem_abort(-1); return OSHMEM_ERROR; } } #if SPML_IKRIT_PUT_DEBUG == 1 SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif /* fill out request */ mxm_req.base.mq = mca_spml_ikrit.mxm_mq; #if MXM_API < MXM_VERSION(2,0) mxm_req.base.flags = MXM_REQ_FLAG_BLOCKING; #else mxm_req.flags = MXM_REQ_SEND_FLAG_BLOCKING; #endif mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; mxm_req.base.data.buffer.ptr = src_addr; mxm_req.base.data.buffer.length = size; mxm_req.base.completed_cb = 0; mxm_req.base.context = 0; mxm_req.opcode = MXM_REQ_OP_PUT; mxm_req.op.mem.remote_vaddr = (intptr_t) rva; mxm_req.base.state = MXM_REQ_NEW; mxm_req.base.error = MXM_OK; #if MXM_API < MXM_VERSION(2, 0) mxm_req.base.data.buffer.memh = NULL; mxm_req.op.mem.remote_memh = NULL; #else mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); #endif if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst]->super); mca_spml_ikrit.mxm_peers[dst]->need_fence = 1; } SPML_IKRIT_MXM_POST_SEND(mxm_req); wait.req = &mxm_req.base; wait.state = (mxm_req_state_t)(MXM_REQ_SENT | MXM_REQ_COMPLETED); wait.progress_cb = NULL; wait.progress_arg = NULL; mxm_wait(&wait); return OSHMEM_SUCCESS; }
/** * TODO: using put request as handle is not good. */ static inline int mca_spml_ikrit_put_internal(void* dst_addr, size_t size, void* src_addr, int dst, void **handle, int zcopy) { void *rva; mca_spml_ikrit_put_request_t *put_req; int ptl_id; sshmem_mkey_t *r_mkey; static int count; int need_progress = 0; if (0 >= size) { return OSHMEM_SUCCESS; } ptl_id = get_ptl_id(dst); /* Get rkey of remote PE (dst proc) which must be on memheap */ r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva); if (!r_mkey) { SPML_ERROR("pe=%d: %p is not address of shared variable", dst, dst_addr); oshmem_shmem_abort(-1); return OSHMEM_ERROR; } #if SPML_IKRIT_PUT_DEBUG == 1 SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif if (ptl_id == MXM_PTL_SHM) { if (mca_memheap_base_can_local_copy(r_mkey, dst_addr)) { memcpy((void *) (unsigned long) rva, src_addr, size); /* call progress as often as we would have with regular put */ if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; } /* segment not mapped - fallback to rmda */ ptl_id = MXM_PTL_RDMA; r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva); if (!r_mkey) { SPML_ERROR("pe=%d: %p is not address of shared variable", dst, dst_addr); oshmem_shmem_abort(-1); return OSHMEM_ERROR; } } #if SPML_IKRIT_PUT_DEBUG == 1 SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s", dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey)); #endif put_req = alloc_put_req(); if (NULL == put_req) { SPML_ERROR("out of put requests - aborting"); oshmem_shmem_abort(-1); return OSHMEM_ERROR; } if (handle) *handle = put_req; /* fill out request */ put_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq; /* request immediate responce if we are getting low on send buffers. We only get responce from remote on ack timeout. * Also request explicit ack once in a while */ #if MXM_API < MXM_VERSION(2,0) put_req->mxm_req.opcode = MXM_REQ_OP_PUT; if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) { put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_SYNC; need_progress = 1; } else { put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC; } #else put_req->mxm_req.flags = 0; if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER || (int)opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max || (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) { need_progress = 1; put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; } else { put_req->mxm_req.opcode = MXM_REQ_OP_PUT; } if (!zcopy) { if (size < mca_spml_ikrit.put_zcopy_threshold) { put_req->mxm_req.flags |= MXM_REQ_SEND_FLAG_BLOCKING; } else { put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC; } } #endif put_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn; put_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER; put_req->mxm_req.base.data.buffer.ptr = src_addr; put_req->mxm_req.base.data.buffer.length = size; put_req->mxm_req.base.completed_cb = put_completion_cb; put_req->mxm_req.base.context = put_req; put_req->mxm_req.op.mem.remote_vaddr = (intptr_t) rva; put_req->mxm_req.base.state = MXM_REQ_NEW; put_req->pe = dst; #if MXM_API < MXM_VERSION(2,0) put_req->mxm_req.base.data.buffer.memh = NULL; put_req->mxm_req.op.mem.remote_memh = NULL; #else put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey); #endif OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1); if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) { opal_list_append(&mca_spml_ikrit.active_peers, &mca_spml_ikrit.mxm_peers[dst]->super); mca_spml_ikrit.mxm_peers[dst]->need_fence = 1; } mca_spml_ikrit.mxm_peers[dst]->n_active_puts++; SPML_IKRIT_MXM_POST_SEND(put_req->mxm_req); if (need_progress) mxm_progress(mca_spml_ikrit.mxm_context); return OSHMEM_SUCCESS; }
int mca_pml_yalla_progress(void) { mxm_progress(ompi_pml_yalla.mxm_context); return OMPI_SUCCESS; }