int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t * mtl_request) { mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request; mxm_send_req_t *mxm_send_req; mxm_error_t err; int ret; assert(mtl == &ompi_mtl_mxm.super); mtl_mxm_request->convertor = convertor; ret = ompi_mtl_datatype_pack(mtl_mxm_request->convertor, &mtl_mxm_request->buf, &mtl_mxm_request->length, &mtl_mxm_request->free_after); if (OMPI_SUCCESS != ret) { return ret; } mxm_send_req = &mtl_mxm_request->mxm.send; /* prepare a send request embedded in the MTL request */ mxm_send_req->base.state = MXM_REQ_NEW; mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm); mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest); mxm_send_req->base.flags = 0; mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER; mxm_send_req->base.data.buffer.ptr = mtl_mxm_request->buf; mxm_send_req->base.data.buffer.length = mtl_mxm_request->length; mxm_send_req->base.data.buffer.mkey = MXM_MKEY_NONE; mxm_send_req->base.context = mtl_mxm_request; mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC; } mxm_send_req->opcode = MXM_REQ_OP_SEND; mxm_send_req->op.send.tag = tag; mxm_send_req->op.send.imm_data = ompi_comm_rank(comm); /* post-send */ err = mxm_req_send(mxm_send_req); if (MXM_OK != err) { orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err)); return OMPI_ERROR; } return OMPI_SUCCESS; }
int ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct ompi_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t * mtl_request) { psm_error_t psm_error; uint64_t mqtag; uint32_t flags = 0; int ret; mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_pml; assert(mtl == &ompi_mtl_psm.super); mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag); ret = ompi_mtl_datatype_pack(convertor, &mtl_psm_request->buf, &length, &mtl_psm_request->free_after); mtl_psm_request->length= length; mtl_psm_request->convertor = convertor; mtl_psm_request->type = OMPI_MTL_PSM_ISEND; if (OMPI_SUCCESS != ret) return ret; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; psm_error = psm_mq_isend(ompi_mtl_psm.mq, psm_endpoint->peer_addr, flags, mqtag, mtl_psm_request->buf, length, mtl_psm_request, &mtl_psm_request->psm_request); return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; }
int ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode) { psm_error_t err; mca_mtl_psm2_request_t mtl_psm2_request; psm_mq_tag_t mqtag; uint32_t flags = 0; int ret; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; assert(mtl == &ompi_mtl_psm2.super); PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, &mtl_psm2_request.buf, &length, &mtl_psm2_request.free_after); mtl_psm2_request.length = length; mtl_psm2_request.convertor = convertor; mtl_psm2_request.type = OMPI_mtl_psm2_ISEND; if (OMPI_SUCCESS != ret) return ret; if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) flags |= PSM_MQ_FLAG_SENDSYNC; err = psm_mq_send2(ompi_mtl_psm2.mq, psm_endpoint->peer_addr, flags, &mqtag, mtl_psm2_request.buf, length); if (mtl_psm2_request.free_after) { free(mtl_psm2_request.buf); } return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR; }
int ompi_mtl_mx_send(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode) { mx_return_t mx_return; uint64_t match_bits; mca_mtl_mx_request_t mtl_mx_request; size_t length; mx_status_t mx_status; uint32_t result; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; char* where; assert(mtl == &ompi_mtl_mx.super); MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); ompi_mtl_datatype_pack(convertor, &mtl_mx_request.mx_segment[0].segment_ptr, &length, &mtl_mx_request.free_after); mtl_mx_request.mx_segment[0].segment_length = length; mtl_mx_request.convertor = convertor; mtl_mx_request.type = OMPI_MTL_MX_ISEND; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "issend bits: 0x%016" PRIu64 "\n", match_bits)); if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, mtl_mx_request.mx_segment, 1, mx_endpoint->mx_peer_addr, match_bits, &mtl_mx_request, &mtl_mx_request.mx_request ); where = "mx_issend"; } else { mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, mtl_mx_request.mx_segment, 1, mx_endpoint->mx_peer_addr, match_bits, &mtl_mx_request, &mtl_mx_request.mx_request ); where = "mx_isend"; } if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { char peer_name[MX_MAX_HOSTNAME_LEN]; if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); } opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n", where, mx_strerror(mx_return), peer_name); /* Free buffer if needed */ if(mtl_mx_request.free_after) { free(mtl_mx_request.mx_segment[0].segment_ptr); } return OMPI_ERROR; } do { mx_return = mx_test(ompi_mtl_mx.mx_endpoint, &mtl_mx_request.mx_request, &mx_status, &result); if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_wait (error %s)\n", mx_strerror(mx_return)); abort(); } if( OPAL_UNLIKELY(result && mx_status.code != MX_STATUS_SUCCESS) ) { opal_output(ompi_mtl_base_framework.framework_output, "Error in ompi_mtl_mx_send, mx_wait returned something other than MX_STATUS_SUCCESS: mx_status.code = %d.\n", mx_status.code); abort(); } } while(!result); /* Free buffer if needed */ if(mtl_mx_request.free_after) { free(mtl_mx_request.mx_segment[0].segment_ptr); } return OMPI_SUCCESS; }
int ompi_mtl_mx_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t * mtl_request) { mx_return_t mx_return; uint64_t match_bits; mca_mtl_mx_request_t * mtl_mx_request = (mca_mtl_mx_request_t*) mtl_request; size_t length; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; char* where; assert(mtl == &ompi_mtl_mx.super); MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); ompi_mtl_datatype_pack(convertor, &mtl_mx_request->mx_segment[0].segment_ptr, &length, &mtl_mx_request->free_after); mtl_mx_request->mx_segment[0].segment_length = length; mtl_mx_request->convertor = convertor; mtl_mx_request->type = OMPI_MTL_MX_ISEND; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "issend bits: 0x%016" PRIu64 "\n", match_bits)); if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, mtl_mx_request->mx_segment, 1, mx_endpoint->mx_peer_addr, match_bits, mtl_mx_request, &mtl_mx_request->mx_request ); where = "mx_issend"; } else { mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, mtl_mx_request->mx_segment, 1, mx_endpoint->mx_peer_addr, match_bits, mtl_mx_request, &mtl_mx_request->mx_request ); where = "mx_isend"; } if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { char peer_name[MX_MAX_HOSTNAME_LEN]; if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); } opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n", where, mx_strerror(mx_return), peer_name); return OMPI_ERROR; } return OMPI_SUCCESS; }
static inline int ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode, ompi_mtl_portals4_isend_request_t* ptl_request) { int ret= OMPI_SUCCESS; void *start; size_t length; bool free_after; ptl_process_t ptl_proc; #if OMPI_MTL_PORTALS4_FLOW_CONTROL opal_free_list_item_t *item; ompi_mtl_portals4_pending_request_t *pending; #endif if ((ompi_mtl_portals4.use_logical) && (MPI_COMM_WORLD == comm)) { ptl_proc.rank = dest; } else { ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->event_count = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Send %lu to %x,%x of length %ld\n", ptl_request->opcount, ptl_proc.phys.nid, ptl_proc.phys.pid, (int64_t)length)); #if OMPI_MTL_PORTALS4_FLOW_CONTROL item = opal_free_list_get (&ompi_mtl_portals4.flowctl.pending_fl); if (NULL == item) return OMPI_ERR_OUT_OF_RESOURCE; pending = (ompi_mtl_portals4_pending_request_t*) item; ptl_request->pending = pending; pending->mode = mode; pending->start = start; pending->length = length; pending->contextid = comm->c_contextid; pending->tag = tag; pending->my_rank = comm->c_my_rank; pending->fc_notified = 0; pending->ptl_proc = ptl_proc; pending->ptl_request = ptl_request; if (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); ompi_mtl_portals4_pending_list_progress(); return OMPI_SUCCESS; } if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) { OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends, &pending->super.super); return OMPI_SUCCESS; } #endif if (length <= ompi_mtl_portals4.eager_limit) { ret = ompi_mtl_portals4_short_isend(mode, start, length, comm->c_contextid, tag, comm->c_my_rank, ptl_proc, ptl_request); } else { ret = ompi_mtl_portals4_long_isend(start, length, comm->c_contextid, tag, comm->c_my_rank, ptl_proc, ptl_request); } return ret; }
int ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t* comm, int dest, int tag, struct ompi_convertor_t *convertor, mca_pml_base_send_mode_t mode, bool blocking, mca_mtl_request_t *mtl_request) { int ret; ptl_match_bits_t match_bits; ptl_md_t md; ptl_handle_md_t md_h; ptl_handle_me_t me_h; ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); mca_mtl_base_endpoint_t *endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml; ompi_mtl_portals_request_t *ptl_request = (ompi_mtl_portals_request_t*) mtl_request; size_t buflen; assert(mtl == &ompi_mtl_portals.base); ret = ompi_mtl_datatype_pack(convertor, &md.start, &buflen, &(ptl_request->free_after)); if (OMPI_SUCCESS != ret) return ret; md.length = buflen; ptl_request->event_callback = ompi_mtl_portals_send_progress; if ((MCA_PML_BASE_SEND_READY == mode)) { /* ready send (length doesn't matter) or short non-sync send. Eagerly send data and don't wait for completion */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_READY_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "ready send bits: 0x%016llx\n", match_bits)); md.threshold = 1; md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack; ret = PtlPut(md_h, PTL_NO_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, 0); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else if (md.length > ompi_mtl_portals.eager_limit) { /* it's a long message - same protocol for all send modes other than ready */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_LONG_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "long send bits: 0x%016llx (%d)\n", match_bits, dest)); md.threshold = 2; /* send, {ack, get} */ md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, OMPI_MTL_PORTALS_READ_TABLE_ID, endpoint->ptl_proc, (ptl_match_bits_t)(uintptr_t) ptl_request, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlMDAttach(me_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { PtlMEUnlink(me_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlPut(md_h, PTL_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, (ptl_hdr_data_t)(uintptr_t) ptl_request); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { /* short synchronous message */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_SHORT_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "short ssend bits: 0x%016llx (%d)\n", match_bits, dest)); md.threshold = 2; /* send, {ack, put} */ md.options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, OMPI_MTL_PORTALS_ACK_TABLE_ID, endpoint->ptl_proc, (ptl_match_bits_t)(uintptr_t) ptl_request, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlMDAttach(me_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { PtlMEUnlink(me_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ret = PtlPut(md_h, PTL_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, (ptl_hdr_data_t)(uintptr_t) ptl_request); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } else { /* short send message */ PTL_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag, PTL_SHORT_MSG); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "short send bits: 0x%016llx\n", match_bits)); md.threshold = 1; md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &(md_h)); if (OMPI_SUCCESS != ret) { if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack; ret = PtlPut(md_h, PTL_NO_ACK_REQ, endpoint->ptl_proc, OMPI_MTL_PORTALS_SEND_TABLE_ID, 0, match_bits, 0, 0); if (OMPI_SUCCESS != ret) { PtlMDUnlink(md_h); if (ptl_request->free_after) free(md.start); return ompi_common_portals_error_ptl_to_ompi(ret); } } return OMPI_SUCCESS; }