Esempio n. 1
0
int MPID_nem_tcp_ckpt_continue_vc(MPIDI_VC_t *vc)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_PKT_DECL_CAST(upkt, MPIDI_nem_tcp_pkt_unpause_t, unpause_pkt);
    MPIR_Request *unpause_req;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);

    unpause_pkt->type = MPIDI_NEM_PKT_NETMOD;
    unpause_pkt->subtype = MPIDI_NEM_TCP_PKT_UNPAUSE;

    mpi_errno = MPID_nem_tcp_iStartContigMsg_paused(vc, &upkt, sizeof(MPIDI_nem_tcp_pkt_unpause_t), NULL, 0, &unpause_req);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    if (unpause_req) {
        if (unpause_req->status.MPI_ERROR) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
        MPIR_Request_free(unpause_req);
        if (mpi_errno) goto fn_fail;
    }

fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_CONTINUE_VC);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
Esempio n. 2
0
int MPIDI_nem_ckpt_start(void)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START);

    if (checkpointing)
        goto fn_exit;

    checkpointing = TRUE;

    marker_count = MPIDI_Process.my_pg->size - 1; /* We won't receive a marker from ourselves. */

    ++current_wave;

    /* send markers to all other processes */
    /* FIXME: we're only handling processes in our pg, so no dynamic connections */
    for (i = 0; i <  MPIDI_Process.my_pg->size; ++i) {
        MPID_Request *req;
        MPIDI_VC_t *vc;
        MPIDI_CH3I_VC *vc_ch;
        MPID_PKT_DECL_CAST(upkt, MPID_nem_pkt_ckpt_marker_t, ckpt_pkt);

        /* Don't send a marker to ourselves. */
        if (i == MPIDI_Process.my_pg_rank)
            continue;
       
        MPIDI_PG_Get_vc_set_active(MPIDI_Process.my_pg, i, &vc);
        vc_ch = &vc->ch;

        MPIDI_Pkt_init(ckpt_pkt, MPIDI_NEM_PKT_CKPT_MARKER);
        ckpt_pkt->wave = current_wave;
        
        mpi_errno = MPIDI_CH3_iStartMsg(vc, ckpt_pkt, sizeof(ckpt_pkt), &req);
        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ckptpkt");
        if (req != NULL)
        {
            MPIU_ERR_CHKANDJUMP(req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**ckptpkt");
            MPID_Request_release(req);
        }

        if (!vc_ch->is_local) {
            mpi_errno = vc_ch->ckpt_pause_send_vc(vc);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        }
    }
    

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
Esempio n. 3
0
int MPID_nem_ptl_pkt_cancel_send_req_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
                                                    MPIDI_msg_sz_t *buflen, MPID_Request **rreqp)
{
    int ret, mpi_errno = MPI_SUCCESS;
    MPIDI_nem_ptl_pkt_cancel_send_req_t *req_pkt = (MPIDI_nem_ptl_pkt_cancel_send_req_t *)pkt;
    MPID_PKT_DECL_CAST(upkt, MPIDI_nem_ptl_pkt_cancel_send_resp_t, resp_pkt);
    MPID_Request *search_req, *resp_req;
    ptl_me_t me;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
      "received cancel send req pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d",
                      req_pkt->sender_req_id, req_pkt->match.parts.rank,
                      req_pkt->match.parts.tag, req_pkt->match.parts.context_id));

    /* create a dummy request and search for the message */
    /* create a request */
    search_req = MPID_Request_create();
    MPID_nem_ptl_init_req(search_req);
    MPIR_ERR_CHKANDJUMP1(!search_req, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Request_create");
    MPIU_Object_set_ref(search_req, 2); /* 1 ref for progress engine and 1 ref for us */
    search_req->kind = MPID_REQUEST_MPROBE;

    /* create a dummy ME to use for searching the list */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = ( PTL_ME_OP_PUT | PTL_ME_USE_ONCE );
    me.min_free = 0;
    me.match_bits = NPTL_MATCH(req_pkt->match.parts.tag, req_pkt->match.parts.context_id, req_pkt->match.parts.rank);

    me.match_id = vc_ptl->id;
    me.ignore_bits = NPTL_MATCH_IGNORE;

    /* FIXME: this should use a custom handler that throws the data away inline */
    REQ_PTL(search_req)->event_handler = handle_mprobe;

    /* submit a search request */
    ret = PtlMESearch(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_pt, &me, PTL_SEARCH_DELETE, search_req);
    MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmesearch", "**ptlmesearch %s", MPID_nem_ptl_strerror(ret));
    DBG_MSG_MESearch("REG", vc ? vc->pg_rank : 0, me, search_req);

    /* wait for search request to complete */
    do {
        mpi_errno = MPID_nem_ptl_poll(FALSE);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    } while (!MPID_Request_is_complete(search_req));

    /* send response */
    resp_pkt->type = MPIDI_NEM_PKT_NETMOD;
    resp_pkt->subtype = MPIDI_NEM_PTL_PKT_CANCEL_SEND_RESP;
    resp_pkt->ack = REQ_PTL(search_req)->found;
    resp_pkt->sender_req_id = req_pkt->sender_req_id;

    MPID_nem_ptl_iStartContigMsg(vc, resp_pkt, sizeof(*resp_pkt), NULL,
                                 0, &resp_req);

    /* if the message was found, free the temporary buffer used to copy the data */
    if (REQ_PTL(search_req)->found)
        MPIU_Free(search_req->dev.tmpbuf);

    MPID_Request_release(search_req);
    if (resp_req != NULL)
        MPID_Request_release(resp_req);

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}