int MPIDI_nem_ib_packetized_recv_req(MPIDI_VC_t * vc, MPID_Request * rreq) { int mpi_errno = MPI_SUCCESS; if (NULL == VC_FIELD(vc, connection)->packetized_recv) { VC_FIELD(vc, connection)->packetized_recv = (void *) rreq; } else { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); } DEBUG_PRINT("Add rreq %p to packetized recv\n", rreq); return mpi_errno; }
int MPID_nem_gm_lmt_start_recv (MPIDI_VC_t *src_vc, struct iovec s_cookie, struct iovec r_cookie, int *completion_ctr) { int ret; struct iovec *s_iov; struct iovec *r_iov; int s_n_iov; int r_n_iov; int s_offset; int r_offset; s_iov = s_cookie.iov_base; s_n_iov = s_cookie.iov_len / sizeof (struct iovec); r_iov = r_cookie.iov_base; r_n_iov = r_cookie.iov_len / sizeof (struct iovec); r_offset = 0; s_offset = 0; ret = MPID_nem_gm_lmt_do_get (VC_FIELD(src_vc, gm_node_id), VC_FIELD(src_vc, gm_port_id), &r_iov, &r_n_iov, &r_offset, &s_iov, &s_n_iov, &s_offset, completion_ctr); if (ret == LMT_AGAIN) { MPID_nem_gm_lmt_queue_t *e = MPID_nem_gm_queue_alloc (lmt); if (!e) { printf ("error: malloc failed\n"); return -1; } e->node_id = VC_FIELD(src_vc, gm_node_id); e->port_id = VC_FIELD(src_vc, gm_port_id); e->r_iov = r_iov; e->r_n_iov = r_n_iov; e->r_offset = r_offset; e->s_iov = s_iov; e->s_n_iov = s_n_iov; e->s_offset = s_offset; e->compl_ctr = completion_ctr; MPID_nem_gm_queue_enqueue (lmt, e); } else if (ret == LMT_FAILURE) { printf ("error: MPID_nem_gm_lmt_do_get() failed \n"); return -1; } return 0; }
int MPID_nem_newmad_vc_init (MPIDI_VC_t *vc) { MPIDI_CH3I_VC *vc_ch = VC_CH(vc); char *business_card; int mpi_errno = MPI_SUCCESS; int val_max_sz; int ret; #ifdef USE_PMI2_API val_max_sz = PMI2_MAX_VALLEN; #else mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz); #endif business_card = (char *)MPIU_Malloc(val_max_sz); mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz,vc->pg); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* Very important */ memset(VC_FIELD(vc, url),0,MPID_NEM_NMAD_MAX_SIZE); mpi_errno = MPID_nem_newmad_get_from_bc (business_card, VC_FIELD(vc, url)); if (mpi_errno) MPIU_ERR_POP (mpi_errno); MPIU_Free(business_card); ret = nm_session_connect(mpid_nem_newmad_session, &(VC_FIELD(vc,p_gate)), VC_FIELD(vc, url)); if (ret != NM_ESUCCESS) fprintf(stdout,"nm_session_connect returned ret = %d\n", ret); nm_gate_ref_set(VC_FIELD(vc, p_gate),(void*)vc); MPIDI_CHANGE_VC_STATE(vc, ACTIVE); vc->eager_max_msg_sz = 32768; vc->rndvSend_fn = NULL; vc->sendNoncontig_fn = MPID_nem_newmad_SendNoncontig; vc->comm_ops = &comm_ops; vc_ch->iStartContigMsg = MPID_nem_newmad_iStartContigMsg; vc_ch->iSendContig = MPID_nem_newmad_iSendContig; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mx_vc_init (MPIDI_VC_t *vc) { uint32_t threshold; MPIDI_CH3I_VC *vc_ch = VC_CH(vc); int mpi_errno = MPI_SUCCESS; /* first make sure that our private fields in the vc fit into the area provided */ MPIU_Assert(sizeof(MPID_nem_mx_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN); #ifdef ONDEMAND VC_FIELD(vc, local_connected) = 0; VC_FIELD(vc, remote_connected) = 0; #else { char *business_card; int val_max_sz; int ret; #ifdef USE_PMI2_API val_max_sz = PMI2_MAX_VALLEN; #else mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz); #endif business_card = (char *)MPIU_Malloc(val_max_sz); mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id)); if (mpi_errno) MPIU_ERR_POP (mpi_errno); MPIU_Free(business_card); ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id), MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr))); MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret)); mx_set_endpoint_addr_context(VC_FIELD(vc, remote_endpoint_addr),(void *)vc); MPIDI_CHANGE_VC_STATE(vc, ACTIVE); } #endif mx_get_info(MPID_nem_mx_local_endpoint, MX_COPY_SEND_MAX, NULL, 0, &threshold, sizeof(uint32_t)); vc->eager_max_msg_sz = threshold; vc->rndvSend_fn = NULL; vc->sendNoncontig_fn = MPID_nem_mx_SendNoncontig; vc->comm_ops = &comm_ops; vc_ch->iStartContigMsg = MPID_nem_mx_iStartContigMsg; vc_ch->iSendContig = MPID_nem_mx_iSendContig; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_nem_ib_recv_addr(MPIDI_VC_t * vc, void *vstart) { MPIDI_nem_ib_pkt_address *pkt = vstart; int i; int ret; #ifdef _ENABLE_XRC_ if (USE_XRC && (0 == MPIDI_CH3I_RDMA_Process.xrc_rdmafp || VC_XST_ISSET (vc, XF_CONN_CLOSING))) return 1; #endif DEBUG_PRINT("set rdma address, dma address %p\n", (void *)pkt->rdma_address); /* check if it has accepted max allowing connections */ if (rdma_fp_sendconn_accepted >= rdma_polling_set_limit) { vbuf_address_reply_send(vc, RDMA_FP_MAX_SEND_CONN_REACHED); goto fn_exit; } if (pkt->rdma_address != 0) { /* Allocating the send vbufs for the eager RDMA flow */ ret = vbuf_fast_rdma_alloc(vc, 0); if (ret == MPI_SUCCESS) { for (i = 0; i < ib_hca_num_hcas; i ++) { VC_FIELD(vc, connection)->rfp.RDMA_remote_buf_rkey[i] = pkt->rdma_hndl[i]; } VC_FIELD(vc, connection)->rfp.remote_RDMA_buf = (void *)pkt->rdma_address; vbuf_address_reply_send(vc, RDMA_FP_SUCCESS); rdma_fp_sendconn_accepted++; } else { vbuf_address_reply_send(vc, RDMA_FP_SENDBUFF_ALLOC_FAILED); return -1; } } fn_exit: return MPI_SUCCESS; }
int MPID_nem_newmad_cancel_send(MPIDI_VC_t *vc, MPID_Request *sreq) { nm_sr_request_t *nmad_req = NULL; int mpi_errno = MPI_SUCCESS; int ret; nmad_req = &(REQ_FIELD(sreq,newmad_req)); ret = nm_sr_scancel(mpid_nem_newmad_session,nmad_req); if (ret == NM_ESUCCESS) { sreq->status.cancelled = TRUE; (VC_FIELD(vc,pending_sends)) -= 1; } else { sreq->status.cancelled = FALSE; } fn_exit: return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
int MPID_nem_newmad_directRecv(MPIDI_VC_t *vc, MPID_Request *rreq) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); if (!VC_CH(vc)->is_local) { nm_tag_t match_info = 0; nm_tag_t match_mask = NEM_NMAD_MATCH_FULL_MASK; MPIR_Rank_t source = rreq->dev.match.parts.rank; MPIR_Context_id_t context = rreq->dev.match.parts.context_id; Nmad_Nem_tag_t tag = rreq->dev.match.parts.tag; int ret; MPIDI_msg_sz_t data_sz; int dt_contig; MPI_Aint dt_true_lb; MPID_Datatype *dt_ptr; NEM_NMAD_DIRECT_MATCH(match_info,0,source,context); if (tag != MPI_ANY_TAG) { NEM_NMAD_SET_TAG(match_info,tag); } else { NEM_NMAD_SET_ANYTAG(match_info); NEM_NMAD_SET_ANYTAG(match_mask); } #ifdef DEBUG fprintf(stdout,"========> Posting Recv req %p (match is %lx) \n",rreq,match_info); #endif MPIDI_Datatype_get_info(rreq->dev.user_count,rreq->dev.datatype, dt_contig, data_sz, dt_ptr,dt_true_lb); rreq->dev.OnDataAvail = NULL; if (dt_contig) { ret = nm_sr_irecv_with_ref_tagged(mpid_nem_newmad_session,VC_FIELD(vc,p_gate),match_info,match_mask, (char*)(rreq->dev.user_buf) + dt_true_lb,data_sz, &(REQ_FIELD(rreq,newmad_req)),(void*)rreq); REQ_FIELD(rreq,iov) = NULL; } else { int num_seg = 0; struct iovec *newmad_iov = (struct iovec *)MPIU_Malloc(NMAD_IOV_MAX_DEPTH*sizeof(struct iovec)); struct iovec *newmad_iov_ptr = &(newmad_iov[0]); MPID_nem_newmad_process_rdtype(&rreq,dt_ptr,data_sz,&newmad_iov_ptr,&num_seg); MPIU_Assert(num_seg <= NMAD_IOV_MAX_DEPTH); #ifdef DEBUG { int index; for(index = 0; index < num_seg ; index++) { fprintf(stdout,"======================\n"); fprintf(stdout,"RECV nmad_iov[%i]: [base %p][len %i]\n",index, newmad_iov[index].iov_base,newmad_iov[index].iov_len); } } #endif ret = nm_sr_irecv_iov_with_ref_tagged(mpid_nem_newmad_session,VC_FIELD(vc,p_gate),match_info,match_mask, newmad_iov,num_seg,&(REQ_FIELD(rreq,newmad_req)),(void*)rreq); REQ_FIELD(rreq,iov) = newmad_iov; } } else { /* Fixme : this might not work in the case of multiple netmods */ memset((&(REQ_FIELD(rreq,newmad_req))),0,sizeof(nm_sr_request_t)); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_NEWMAD_DIRECTRECV); return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }
/** * FIXME: Ideally the header size should be determined by high level macros, * instead of hacking the message header at the device layer */ int MPIDI_CH3I_nem_ib_parse_header(MPIDI_VC_t * vc, vbuf * v, void **pkt, int *header_size) { void *vstart; MPIDI_nem_ib_pkt_comm_header *header; #ifdef CRC_CHECK unsigned long crc; #endif int mpi_errno = MPI_SUCCESS; int ret; DEBUG_PRINT("[parse header] vbuf address %p\n", v); vstart = v->pheader; header = (MPIDI_nem_ib_pkt_comm_header *)v->iheader; DEBUG_PRINT("[parse header] header type %d\n", header->type); /* set it to the header size by default */ *header_size = sizeof(MPIDI_CH3_Pkt_t); #ifdef CRC_CHECK crc = update_crc(1, (void *)((uintptr_t)header+sizeof *header), v->content_size - sizeof *header); if (crc != header->mrail.crc) { int rank; PMI_Get_rank(&rank); MPIU_Error_printf(stderr, "CRC mismatch, get %lx, should be %lx " "type %d, ocntent size %d\n", crc, header->mrail.crc, header->type, v->content_size); exit( EXIT_FAILURE ); } #endif switch (header->type) { /*header caching codes */ #ifndef MV2_DISABLE_HEADER_CACHING case (MPIDI_CH3_PKT_FAST_EAGER_SEND): case (MPIDI_CH3_PKT_FAST_EAGER_SEND_WITH_REQ): { /* since header caching do not have regular iheader, * revert back pre-adjust */ v->content_size += IB_PKT_HEADER_LENGTH; vstart -= IB_PKT_HEADER_LENGTH; v->pheader -= IB_PKT_HEADER_LENGTH; MPIDI_nem_ib_pkt_fast_eager *fast_header = vstart; MPIDI_CH3_Pkt_eager_send_t *eager_header = (MPIDI_CH3_Pkt_eager_send_t *) VC_FIELD(vc, connection)->rfp. cached_incoming; MPIDI_nem_ib_pkt_comm_header *eager_iheader = (MPIDI_nem_ib_pkt_comm_header *) VC_FIELD(vc, connection)->rfp. cached_incoming_iheader; if (MPIDI_CH3_PKT_FAST_EAGER_SEND == header->type) { *header_size = sizeof(MPIDI_nem_ib_pkt_fast_eager); } else { *header_size = sizeof(MPIDI_nem_ib_pkt_fast_eager_with_req); eager_header->sender_req_id = ((MPIDI_nem_ib_pkt_fast_eager_with_req *) vstart)->sender_req_id; } header = eager_iheader; DEBUG_PRINT("[receiver side] cached credit %d\n", eager_iheader->rdma_credit); eager_header->data_sz = fast_header->bytes_in_pkt; *pkt = (void *) eager_header; DEBUG_PRINT ("[recv: parse header] faster headersize returned %d\n", *header_size); } break; #endif case (MPIDI_CH3_PKT_EAGER_SEND): { DEBUG_PRINT("[recv: parse header] pkt eager send\n"); /* header caching codes */ #ifndef MV2_DISABLE_HEADER_CACHING if (v->padding != NORMAL_VBUF_FLAG && (v->content_size - sizeof(MPIDI_CH3_Pkt_t) <= MAX_SIZE_WITH_HEADER_CACHING) ) { /* Only cache header if the packet is from RdMA path * XXXX: what is R3_FLAG? */ MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming), vstart, sizeof(MPIDI_CH3_Pkt_eager_send_t)); MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming_iheader), header, sizeof(MPIDI_nem_ib_pkt_comm_header)); } #endif *pkt = (MPIDI_CH3_Pkt_t *) vstart; *header_size = sizeof(MPIDI_CH3_Pkt_t); DEBUG_PRINT("[recv: parse header] headersize returned %d\n", *header_size); } break; case (MPIDI_CH3_PKT_RNDV_REQ_TO_SEND): case (MPIDI_CH3_PKT_RNDV_CLR_TO_SEND): case MPIDI_CH3_PKT_EAGER_SYNC_ACK: case MPIDI_NEM_PKT_LMT_RTS: case MPIDI_NEM_PKT_LMT_CTS: case MPIDI_NEM_PKT_LMT_DONE: case MPIDI_NEM_PKT_LMT_COOKIE: /* CKPT codes */ #ifdef CKPT case MPIDI_CH3_PKT_CM_SUSPEND: case MPIDI_CH3_PKT_CM_REACTIVATION_DONE: case MPIDI_CH3_PKT_CR_REMOTE_UPDATE: #endif { *pkt = vstart; } break; case MPIDI_CH3_PKT_CANCEL_SEND_REQ: { *pkt = vstart; /*Fix: Need to unregister and free the rndv buffer in get protocol.*/ } break; case MPIDI_CH3_PKT_CANCEL_SEND_RESP: { MPID_Request *req; *pkt = vstart; MPID_Request_get_ptr(((MPIDI_CH3_Pkt_cancel_send_resp_t *)(*pkt))->sender_req_id, req); if (req != NULL) { /* unregister and free the rndv buffer */ MPIDI_NEM_IB_RREQ_RNDV_FINISH(req); } } break; case (MPIDI_CH3_PKT_NOOP): { *pkt = v->iheader; } break; /* rfp codes */ case MPIDI_CH3_PKT_ADDRESS: { *pkt = v->iheader; MPIDI_nem_ib_recv_addr(vc, vstart); break; } case MPIDI_CH3_PKT_ADDRESS_REPLY: { *pkt = v->iheader; MPIDI_nem_ib_recv_addr_reply(vc, vstart); break; } case MPIDI_CH3_PKT_PACKETIZED_SEND_START: { *pkt = vstart; *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_start_t); break; } case MPIDI_CH3_PKT_PACKETIZED_SEND_DATA: { *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t); *pkt = vstart; break; } case MPIDI_CH3_PKT_RNDV_R3_DATA: { *header_size = sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t); *pkt = vstart; break; } case MPIDI_CH3_PKT_RNDV_R3_ACK: { *pkt = v->iheader; MPIDI_nem_ib_lmt_r3_recv_ack(vc, vstart); break; } #if defined(USE_EAGER_SHORT) case MPIDI_CH3_PKT_EAGERSHORT_SEND: #endif case MPIDI_CH3_PKT_EAGER_SYNC_SEND: case MPIDI_CH3_PKT_READY_SEND: { *pkt = vstart; break; } case MPIDI_CH3_PKT_PUT: { *pkt = vstart; break; } case MPIDI_CH3_PKT_GET: { *pkt = vstart; break; } case MPIDI_CH3_PKT_GET_RESP: /*15 */ { *pkt = vstart; break; } case MPIDI_CH3_PKT_ACCUMULATE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_GRANTED: { *pkt = vstart; break; } case MPIDI_CH3_PKT_PT_RMA_DONE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_PUT_UNLOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_GET_UNLOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK: case MPIDI_CH3_PKT_ACCUM_IMMED: { *pkt = vstart; break; } case MPIDI_CH3_PKT_FLOW_CNTL_UPDATE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_CLOSE: { *pkt = vstart; } break; default: { /* Header is corrupted if control has reached here in prototype */ /* */ MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", "Control shouldn't reach here " "in prototype, header %d\n", header->type); } } DEBUG_PRINT("Before set credit, vc: %p, v->rail: %d, " "pkt: %p, pheader: %p\n", vc, v->rail, pkt, v->pheader); SET_CREDIT(header, VC_FIELD(vc, connection), (v->rail)); if (VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0 && VC_FIELD(vc, connection)->srp.credits[v->rail].backlog.len > 0) { /* backlog send codes */ MRAILI_Backlog_send(vc, v->rail); } /* if any credits remain, schedule rendezvous progress */ if ((VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0 /* rfp codes */ || (VC_FIELD(vc, connection)->rfp.ptail_RDMA_send != VC_FIELD(vc, connection)->rfp.phead_RDMA_send) ) && (VC_FIELD(vc, connection)->sreq_head != NULL)) { /* rndv codes */ #if 0 PUSH_FLOWLIST(vc); #endif } /* rfp codes */ if ((VC_FIELD(vc, connection)->rfp.RDMA_recv_buf == NULL) && /*(c->initialized) && */ num_rdma_buffer && !VC_FIELD(vc, connection)->rfp.rdma_failed) { if ((process_info.polling_group_size + rdma_pending_conn_request) < rdma_polling_set_limit) { VC_FIELD(vc, connection)->rfp.eager_start_cnt++; if (rdma_polling_set_threshold < VC_FIELD(vc, connection)->rfp.eager_start_cnt) { { ret = vbuf_fast_rdma_alloc(vc, 1); if (ret == MPI_SUCCESS) { vbuf_address_send(vc); rdma_pending_conn_request++; } else { VC_FIELD(vc, connection)->rfp.rdma_failed = 1; } goto fn_exit; } } } } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_nem_ib_packetized_recv_data(MPIDI_VC_t * vc, vbuf *v) { int mpi_errno = MPI_SUCCESS; int skipsize = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t); int nb, complete; MPID_Request *rreq = VC_FIELD(vc, connection)->packetized_recv; if (NULL == VC_FIELD(vc, connection)->packetized_recv) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } DEBUG_PRINT("[pkt recv], rreq %p, offset %d, count %d\n", rreq, rreq->dev.iov_offset, rreq->dev.iov_count); mpi_errno = MPIDI_nem_ib_fill_request(rreq, v, skipsize, &nb); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } skipsize += nb; if (MPIDI_nem_ib_request_adjust_iov(rreq, nb)) { mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); DEBUG_PRINT("[recv: handle read] adjust req fine, complete %d\n", complete); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } while (complete != TRUE) { mpi_errno = MPIDI_nem_ib_fill_request(rreq, v, skipsize, &nb); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } if (!MPIDI_nem_ib_request_adjust_iov(rreq, nb)) { goto fn_exit; } skipsize += nb; mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); DEBUG_PRINT("[recv: handle read] adjust req fine, complete %d\n", complete); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } } if (TRUE == complete) { VC_FIELD(vc, connection)->packetized_recv = NULL; } } fn_exit: return mpi_errno; }
int MPIDI_nem_ib_recv_addr_reply(MPIDI_VC_t * vc, void *vstart) { int hca_index; int ret; MPIDI_nem_ib_pkt_address_reply *pkt = vstart; DEBUG_PRINT("Received addr reply packet. reply data :%d\n", pkt->reply_data); if (pkt->reply_data == RDMA_FP_SENDBUFF_ALLOC_FAILED || pkt->reply_data == RDMA_FP_MAX_SEND_CONN_REACHED) { DEBUG_PRINT("RDMA FP setup failed. clean up recv buffers\n "); /* de-regster the recv buffers */ for (hca_index = 0; hca_index < ib_hca_num_hcas; hca_index++) { if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index]) { ret = deregister_memory(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index]); if (ret) { MPIU_Error_printf("Failed to deregister mr (%d)\n", ret); } else { VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_mr[hca_index] = NULL; } } } /* deallocate recv RDMA buffers */ if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA) { MPIU_Free(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA); VC_FIELD(vc, connection)->rfp.RDMA_recv_buf_DMA = NULL; } /* deallocate vbuf struct buffers */ if (VC_FIELD(vc, connection)->rfp.RDMA_recv_buf) { MPIU_Free(VC_FIELD(vc, connection)->rfp.RDMA_recv_buf); VC_FIELD(vc, connection)->rfp.RDMA_recv_buf = NULL; } /* set flag to mark that FP setup is failed/rejected. we sholdn't try further on this vc */ VC_FIELD(vc, connection)->rfp.rdma_failed = 1; } else if (pkt->reply_data == RDMA_FP_SUCCESS) { /* set pointers */ VC_FIELD(vc, connection)->rfp.p_RDMA_recv = 0; VC_FIELD(vc, connection)->rfp.p_RDMA_recv_tail = num_rdma_buffer - 1; /* Add the connection to the RDMA polling list */ MPIU_Assert(process_info.polling_group_size < rdma_polling_set_limit); process_info.polling_set [process_info.polling_group_size] = vc; process_info.polling_group_size++; VC_FIELD(vc, cmanager)->num_channels += 1; VC_FIELD(vc, cmanager)->num_local_pollings = 1; VC_FIELD(vc, connection)->rfp.in_polling_set = 1; } else { ibv_va_error_abort(GEN_EXIT_ERR, "Invalid reply data received. reply_data: pkt->reply_data%d\n", pkt->reply_data); } rdma_pending_conn_request--; return MPI_SUCCESS; }
int MPID_nem_newmad_iprobe(MPIDI_VC_t *vc, int source, int tag, MPID_Comm *comm, int context_offset, int *flag, MPI_Status *status) { nm_tag_t match_info = 0; nm_tag_t match_mask = NEM_NMAD_MATCH_FULL_MASK; nm_gate_t out_gate; nm_gate_t in_gate; nm_tag_t out_tag; int size; int mpi_errno = MPI_SUCCESS; int ret; NEM_NMAD_SET_CTXT(match_info,comm->context_id + context_offset); if( source == MPI_ANY_SOURCE) { NEM_NMAD_SET_ANYSRC(match_info); NEM_NMAD_SET_ANYSRC(match_mask); in_gate = NM_ANY_GATE; } else { NEM_NMAD_SET_SRC(match_info,source); in_gate = VC_FIELD(vc,p_gate); } if (tag != MPI_ANY_TAG) { NEM_NMAD_SET_TAG(match_info,tag); } else { NEM_NMAD_SET_ANYTAG(match_info); NEM_NMAD_SET_ANYTAG(match_mask); } ret = nm_sr_probe(mpid_nem_newmad_session,in_gate,&out_gate, match_info,match_mask,&out_tag,&size); if (ret == NM_ESUCCESS) { if (source != MPI_ANY_SOURCE) status->MPI_SOURCE = source; else { MPIDI_VC_t *vc; int index; vc = (MPIDI_VC_t *)nm_gate_ref_get(out_gate); for(index = 0 ; index < comm->local_size ; index ++) if (vc == comm->vcr[index]) break; status->MPI_SOURCE = index; } if (tag != MPI_ANY_TAG) status->MPI_TAG = tag; else NEM_NMAD_MATCH_GET_TAG(out_tag,status->MPI_TAG); status->count = size; *flag = TRUE; } else *flag = FALSE; fn_exit: return mpi_errno; fn_fail: ATTRIBUTE((unused)) goto fn_exit; }