int MPID_nem_tcp_ckpt_continue_vc(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_tcp_pkt_unpause_t, unpause_pkt); MPIR_Request *unpause_req; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC); unpause_pkt->type = MPIDI_NEM_PKT_NETMOD; unpause_pkt->subtype = MPIDI_NEM_TCP_PKT_UNPAUSE; mpi_errno = MPID_nem_tcp_iStartContigMsg_paused(vc, &upkt, sizeof(MPIDI_nem_tcp_pkt_unpause_t), NULL, 0, &unpause_req); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (unpause_req) { if (unpause_req->status.MPI_ERROR) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIR_Request_free(unpause_req); if (mpi_errno) goto fn_fail; } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_nem_ckpt_start(void) { int mpi_errno = MPI_SUCCESS; int i; MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START); if (checkpointing) goto fn_exit; checkpointing = TRUE; marker_count = MPIDI_Process.my_pg->size - 1; /* We won't receive a marker from ourselves. */ ++current_wave; /* send markers to all other processes */ /* FIXME: we're only handling processes in our pg, so no dynamic connections */ for (i = 0; i < MPIDI_Process.my_pg->size; ++i) { MPID_Request *req; MPIDI_VC_t *vc; MPIDI_CH3I_VC *vc_ch; MPID_PKT_DECL_CAST(upkt, MPID_nem_pkt_ckpt_marker_t, ckpt_pkt); /* Don't send a marker to ourselves. */ if (i == MPIDI_Process.my_pg_rank) continue; MPIDI_PG_Get_vc_set_active(MPIDI_Process.my_pg, i, &vc); vc_ch = &vc->ch; MPIDI_Pkt_init(ckpt_pkt, MPIDI_NEM_PKT_CKPT_MARKER); ckpt_pkt->wave = current_wave; mpi_errno = MPIDI_CH3_iStartMsg(vc, ckpt_pkt, sizeof(ckpt_pkt), &req); MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ckptpkt"); if (req != NULL) { MPIU_ERR_CHKANDJUMP(req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**ckptpkt"); MPID_Request_release(req); } if (!vc_ch->is_local) { mpi_errno = vc_ch->ckpt_pause_send_vc(vc); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPIDI_msg_sz_t *buflen, MPID_Request **rreqp) { int ret, mpi_errno = MPI_SUCCESS; MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt; MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt); MPID_Request *search_req, *resp_req; ptl_me_t me; MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc); MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST, "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d", req_pkt->sender_req_id, req_pkt->match.parts.rank, req_pkt->match.parts.tag, req_pkt->match.parts.context_id)); /* create a dummy request and search for the message */ /* create a request */ search_req = MPID_Request_create(); MPID_nem_ptl_init_req(search_req); MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create"); MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */ search_req->kind = MPID_REQUEST_MPROBE; /* create a dummy ME to use for searching the list */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.uid = PTL_UID_ANY; me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE ); me.min_free = 0; me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank); me.match_id = vc_ptl->id; me.ignore_bits = NPTL_MATCH_IGNORE; /* FIXME: this should use a custom handler that throws the data away inline */ REQ_PTL(search_req)->event_handler = handle_mprobe; /* submit a search request */ ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret)); DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req); /* wait for search request to complete */ do { mpi_errno = MPID_nem_ptl_poll(FALSE); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } while (!MPID_Request_is_complete(search_req)); /* send response */ resp_pkt->type = MPIDI_NEM_PKT_NETMOD; resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP; resp_pkt->ack = REQ_PTL(search_req)->found; resp_pkt->sender_req_id = req_pkt->sender_req_id; MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL, 0, &resp_req); /* if the message was found, free the temporary buffer used to copy the data */ if (REQ_PTL(search_req)->found) MPIU_Free(search_req->dev.tmpbuf); MPID_Request_release(search_req); if (resp_req != NULL) MPID_Request_release(resp_req); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }