Пример #1
0
int MPIDI_CH3_RecvRndv( MPIDI_VC_t * vc, MPID_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;

    /* A rendezvous request-to-send (RTS) message has arrived.  We need
       to send a CTS message to the remote process. */
    MPID_Request * cts_req;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send;
    
    MPIU_DBG_MSG(CH3_OTHER,VERBOSE,
		 "rndv RTS in the request, sending rndv CTS");
    
    MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
    cts_pkt->sender_req_id = rreq->dev.sender_req_id;
    cts_pkt->receiver_req_id = rreq->handle;
    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req);
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    if (mpi_errno != MPI_SUCCESS) {
	MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**ch3|ctspkt");
    }
    if (cts_req != NULL)
    {
	/* FIXME: Ideally we could specify that a req not be returned.  
	   This would avoid our having to decrement the
	   reference count on a req we don't want/need. */
	MPID_Request_release(cts_req);
    }

 fn_fail:    
    return mpi_errno;
}
Пример #2
0
int MPIDI_nem_ckpt_start(void)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_START);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_START);

    if (checkpointing)
        goto fn_exit;

    checkpointing = TRUE;

    marker_count = MPIDI_Process.my_pg->size - 1; /* We won't receive a marker from ourselves. */

    ++current_wave;

    /* send markers to all other processes */
    /* FIXME: we're only handling processes in our pg, so no dynamic connections */
    for (i = 0; i <  MPIDI_Process.my_pg->size; ++i) {
        MPID_Request *req;
        MPIDI_VC_t *vc;
        MPIDI_CH3I_VC *vc_ch;
        MPID_PKT_DECL_CAST(upkt, MPID_nem_pkt_ckpt_marker_t, ckpt_pkt);

        /* Don't send a marker to ourselves. */
        if (i == MPIDI_Process.my_pg_rank)
            continue;
       
        MPIDI_PG_Get_vc_set_active(MPIDI_Process.my_pg, i, &vc);
        vc_ch = &vc->ch;

        MPIDI_Pkt_init(ckpt_pkt, MPIDI_NEM_PKT_CKPT_MARKER);
        ckpt_pkt->wave = current_wave;
        
        mpi_errno = MPIDI_CH3_iStartMsg(vc, ckpt_pkt, sizeof(ckpt_pkt), &req);
        MPIU_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ckptpkt");
        if (req != NULL)
        {
            MPIU_ERR_CHKANDJUMP(req->status.MPI_ERROR, mpi_errno, MPI_ERR_OTHER, "**ckptpkt");
            MPID_Request_release(req);
        }

        if (!vc_ch->is_local) {
            mpi_errno = vc_ch->ckpt_pause_send_vc(vc);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        }
    }
    

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_START);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
Пример #3
0
/*@
  MPIDI_CH3U_VC_SendClose - Initiate a close on a virtual connection
  
Input Parameters:
+ vc - Virtual connection to close
- i  - rank of virtual connection within a process group (used for debugging)

  Notes:
  The current state of this connection must be either 'MPIDI_VC_STATE_ACTIVE' 
  or 'MPIDI_VC_STATE_REMOTE_CLOSE'.  
  @*/
int MPIDI_CH3U_VC_SendClose( MPIDI_VC_t *vc, int rank )
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_close_t * close_pkt = &upkt.close;
    MPIR_Request * sreq;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);

    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);

    MPIR_Assert( vc->state == MPIDI_VC_STATE_ACTIVE ||
		 vc->state == MPIDI_VC_STATE_REMOTE_CLOSE );

    MPIDI_Pkt_init(close_pkt, MPIDI_CH3_PKT_CLOSE);
    close_pkt->ack = (vc->state == MPIDI_VC_STATE_ACTIVE) ? FALSE : TRUE;
    
    /* MT: this is not thread safe, the POBJ CS is scoped to the vc and
     * doesn't protect this global correctly */
    MPIDI_Outstanding_close_ops += 1;
    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,(MPL_DBG_FDEST,
		  "sending close(%s) on vc (pg=%p) %p to rank %d, ops = %d", 
		  close_pkt->ack ? "TRUE" : "FALSE", vc->pg, vc, 
		  rank, MPIDI_Outstanding_close_ops));
		    

    /*
     * A close packet acknowledging this close request could be
     * received during iStartMsg, therefore the state must
     * be changed before the close packet is sent.
     */
    if (vc->state == MPIDI_VC_STATE_ACTIVE) {
        MPIDI_CHANGE_VC_STATE(vc, LOCAL_CLOSE);
    }
    else {
	MPIR_Assert( vc->state == MPIDI_VC_STATE_REMOTE_CLOSE );
        MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED);
    }
		
    mpi_errno = MPIDI_CH3_iStartMsg(vc, close_pkt, sizeof(*close_pkt), &sreq);
    MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|send_close_ack");
    
    if (sreq != NULL) {
	/* There is still another reference being held by the channel.  It
	   will not be released until the pkt is actually sent. */
	MPIR_Request_free(sreq);
    }

 fn_exit:
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_CH3U_VC_SENDCLOSE);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #4
0
int MPIDI_CH3_iStartRndvTransfer(MPIDI_VC_t * vc, MPID_Request * rreq)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send;
    MPID_Request *cts_req;
    MPID_Seqnum_t seqnum;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER);

#ifdef CKPT
    MPIDI_CH3I_CR_lock();
#endif
        
    MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
    if (rreq->dev.iov_count == 1 && rreq->dev.OnDataAvail == NULL)
	cts_pkt->recv_sz = rreq->dev.iov[0].MPID_IOV_LEN;
    else
	cts_pkt->recv_sz = rreq->dev.segment_size;
    
    cts_pkt->sender_req_id = rreq->dev.sender_req_id;
    cts_pkt->receiver_req_id = rreq->handle;
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(cts_pkt, seqnum);    

    mpi_errno = MPIDI_CH3_Prepare_rndv_cts(vc, cts_pkt, rreq);
    if (mpi_errno != MPI_SUCCESS) {
	mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL,
					 FCNAME, __LINE__,
					 MPI_ERR_OTHER, "**ch3|ctspkt", 0);
	goto fn_exit;
    }

    mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL,
                                 FCNAME, __LINE__,
                                 MPI_ERR_OTHER, "**ch3|ctspkt", 0);
        goto fn_exit;
    }
    /* --END ERROR HANDLING-- */
    if (cts_req != NULL) {
        MPID_Request_release(cts_req);
    }

  fn_exit:
#ifdef CKPT
    MPIDI_CH3I_CR_unlock();
#endif
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_ISTARTRNDVTRANSFER);
    return mpi_errno;
}
Пример #5
0
/* 
 * These routines are called when a receive matches an eager sync send 
 */
int MPIDI_CH3_EagerSyncAck( MPIDI_VC_t *vc, MPIR_Request *rreq )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack;
    MPIR_Request * esa_req;
    
    MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack");
    MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK);
    esa_pkt->sender_req_id = rreq->dev.sender_req_id;
    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req);
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    if (mpi_errno != MPI_SUCCESS) {
	MPIR_ERR_POP(mpi_errno);
    }
    if (esa_req != NULL)
    {
	MPIR_Request_free(esa_req);
    }
 fn_fail:
    return mpi_errno;
}
Пример #6
0
int MPID_Send(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank,
	      int tag, MPID_Comm * comm, int context_offset,
	      MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;    
#if defined(FINEGRAIN_MPI)
    int destpid=-1, destworldrank=-1;
#endif
    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
		rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }

#if defined(FINEGRAIN_MPI)
    MPIDI_Comm_get_pid_worldrank(comm, rank, &destpid, &destworldrank);

    if (COMPARE_RANKS(rank,comm,destpid) && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(&buf, count, datatype, rank, tag, comm,
				     context_offset, MPIDI_REQUEST_TYPE_SEND,
				     &sreq);
        if (rank == comm->rank)
	{
            printf("my_fgrank=%d: %s, self send DEADLOCK\n", my_fgrank, __FUNCTION__);
	    if (sreq != NULL && sreq->cc != 0) {
		MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}

#else
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SEND, 
				     &sreq);

	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    if (sreq != NULL && MPID_cc_get(sreq->cc) != 0) {
		MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}
#	endif
#endif
	if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }
	goto fn_exit;
    }

    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

#if defined(FINEGRAIN_MPI)
    MPIDI_Comm_get_vc_set_active_direct(comm, destpid, &vc);
#else
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#endif
    MPIR_ERR_CHKANDJUMP1(vc->state == MPIDI_VC_STATE_MORIBUND, mpi_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", rank);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->send)
    {
	mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);


    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
#if defined(FINEGRAIN_MPI)
        eager_pkt->match.parts.dest_rank = destworldrank;
#endif
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = MPI_REQUEST_NULL;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq);
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg");
	}
	/* --END ERROR HANDLING-- */
	if (sreq != NULL)
	{
	    MPIDI_Request_set_seqnum(sreq, seqnum);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    /* sreq->comm = comm;
	      MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */
	}
	
	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    /* FIXME: flow control: limit number of outstanding eager messages
       containing data and need to be buffered by the receiver */
#ifdef USE_EAGER_SHORT
    if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) {
	mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, 
					       MPIDI_CH3_PKT_EAGERSHORT_SEND,
					       (char *)buf + dt_true_lb,
					       data_sz, rank, tag, comm, 
					       context_offset );
    }
    else
#endif

    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold)
    {
	if (dt_contig)
        {
 	    mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, 
						   MPIDI_CH3_PKT_EAGER_SEND,
						   (char *)buf + dt_true_lb,
						   data_sz, rank, tag, comm, 
						   context_offset );
	}
	else
        {
	    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	}
    }
    else
    {
Пример #7
0
static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t * vc,
                                                MPID_Request * sreq)
{
    int nb;
    int complete = 0;
    int seqnum;
    int mpi_errno;
    MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head;
    MPID_Request * send_req;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);

    MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA);
    pkt_head.receiver_req_id = sreq->mrail.partner_id;
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(&pkt_head, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);
#if defined(_SMP_LIMIC_)
    /* Use limic2 for contiguous data 
     * Use shared memory for non-contiguous data
     */
    if (!g_smp_use_limic2 ||
        sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV ||
        sreq->dev.iov_count > 1) {
        pkt_head.send_req_id = NULL;
    } else {
        pkt_head.send_req_id = sreq;
    }
#endif

    mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt_head,
                                    sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t),
                                    &send_req);

    if (mpi_errno != MPI_SUCCESS) {
         MPIU_Object_set_ref(sreq, 0);
         MPIDI_CH3_Request_destroy(sreq);
         sreq = NULL;
         mpi_errno =
             MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME,
                              __LINE__, MPI_ERR_OTHER, "**ch3|rtspkt",
                              0);
        return mpi_errno;
    }
    /* --END ERROR HANDLING-- */
    if (send_req != NULL) {
        DEBUG_PRINT("r3 packet not sent \n");
        MPID_Request_release(send_req);
    }
 
#if defined(_SMP_LIMIC_)
      if (pkt_head.send_req_id) {
        sreq->mrail.nearly_complete = 1;
        return MPI_SUCCESS;
    }
#endif

    vc->smp.send_current_pkt_type = SMP_RNDV_MSG;

    DEBUG_PRINT("r3 sent req is %p\n", sreq);
    if (MPIDI_CH3I_SMP_SendQ_empty(vc)) {
        for (;;) {
            DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n",
                        sreq->dev.iov_count, sreq->dev.iov_offset,
                        sreq->dev.iov[0].MPID_IOV_LEN);

            if (vc->smp.send_current_pkt_type == SMP_RNDV_MSG) {
                mpi_errno = MPIDI_CH3I_SMP_writev_rndv_data(vc, 
                                &sreq->dev.iov[sreq->dev.iov_offset], 
                                sreq->dev.iov_count - sreq->dev.iov_offset,
                                &nb);
            } else {
                MPIU_Assert(vc->smp.send_current_pkt_type == SMP_RNDV_MSG_CONT);
                MPIDI_CH3I_SMP_writev_rndv_data_cont(vc,
                                &sreq->dev.iov[sreq->dev.iov_offset],
                                sreq->dev.iov_count - sreq->dev.iov_offset,
                                &nb);
            }

            if (MPI_SUCCESS != mpi_errno) {
                vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED;
                sreq->status.MPI_ERROR = MPI_ERR_INTERN;
                MPIDI_CH3U_Request_complete(sreq);
                return mpi_errno;
            }

            if (nb >= 0) {
                if (MPIDI_CH3I_Request_adjust_iov(sreq, nb)) {
                    MPIDI_CH3U_Handle_send_req(vc, sreq, &complete);
                    if (complete) {
                        sreq->mrail.nearly_complete = 1;
                        break;
                    } else {
                        vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
                    }
                } else {
                    sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
                    MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
                    vc->smp.send_active = sreq;
                    sreq->mrail.nearly_complete = 1;
                    vc->smp.send_current_pkt_type = SMP_RNDV_MSG_CONT;
                    break;
                }
            } else {
                MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq);
                vc->smp.send_active = sreq;
                sreq->mrail.nearly_complete = 1;
                break;
            }
        }
    } else {
        sreq->ch.reqtype = REQUEST_RNDV_R3_DATA;
        MPIDI_CH3I_SMP_SendQ_enqueue(vc, sreq);
        sreq->mrail.nearly_complete = 1;
        vc->smp.send_current_pkt_type = SMP_RNDV_MSG;
        DEBUG_PRINT("Enqueue sreq %p", sreq);
    }

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SMP_RNDV_PUSH);
    return MPI_SUCCESS;
}
Пример #8
0
/* MPIDI_CH3_RndvSend - Send a request to perform a rendezvous send */
int MPIDI_CH3_RndvSend( MPIR_Request **sreq_p, const void * buf, MPI_Aint count,
			MPI_Datatype datatype, int dt_contig, intptr_t data_sz,
			MPI_Aint dt_true_lb,
			int rank, 
			int tag, MPIR_Comm * comm, int context_offset )
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_rndv_req_to_send_t * const rts_pkt = &upkt.rndv_req_to_send;
    MPIDI_VC_t * vc;
    MPIR_Request * rts_sreq;
    MPIR_Request *sreq =*sreq_p;
    int          mpi_errno = MPI_SUCCESS;
	
    MPL_DBG_MSG_D(MPIDI_CH3_DBG_OTHER,VERBOSE,
		   "sending rndv RTS, data_sz=%" PRIdPTR, data_sz);

    sreq->dev.OnDataAvail = 0;
    
    sreq->dev.partner_request = NULL;
	
    MPIDI_Pkt_init(rts_pkt, MPIDI_CH3_PKT_RNDV_REQ_TO_SEND);
    rts_pkt->match.parts.rank	      = comm->rank;
    rts_pkt->match.parts.tag	      = tag;
    rts_pkt->match.parts.context_id = comm->context_id + context_offset;
    rts_pkt->sender_req_id    = sreq->handle;
    rts_pkt->data_sz	      = data_sz;

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(rts_pkt, seqnum);
    MPIDI_Request_set_seqnum(sreq, seqnum);

    MPL_DBG_MSGPKT(vc,tag,rts_pkt->match.parts.context_id,rank,data_sz,"Rndv");

    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, rts_pkt, sizeof(*rts_pkt), &rts_sreq);
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
    {
        MPIR_Request_free(sreq);
	*sreq_p = NULL;
        MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt");
    }
    /* --END ERROR HANDLING-- */
    if (rts_sreq != NULL)
    {
	if (rts_sreq->status.MPI_ERROR != MPI_SUCCESS)
	{
            MPIR_Request_free(sreq);
	    *sreq_p = NULL;
            mpi_errno = rts_sreq->status.MPI_ERROR;
            MPIR_Request_free(rts_sreq);
            MPIR_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rtspkt");
	}
	MPIR_Request_free(rts_sreq);
    }

    /* FIXME: fill temporary IOV or pack temporary buffer after send to hide 
       some latency.  This requires synchronization
       because the CTS packet could arrive and be processed before the above 
       iStartmsg completes (depending on the progress
       engine, threads, etc.). */

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Пример #9
0
int MPID_Send(const void * buf, int count, MPI_Datatype datatype, int rank, 
	      int tag, MPID_Comm * comm, int context_offset,
	      MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_SEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
		rank, tag, comm->context_id + context_offset));

    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SEND, 
				     &sreq);

	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    if (sreq != NULL && sreq->cc != 0) {
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				    "**dev|selfsenddeadlock");
	    }
	}
#	endif
	if (mpi_errno != MPI_SUCCESS) { MPIU_ERR_POP(mpi_errno); }
	goto fn_exit;
    }

    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->send)
    {
	mpi_errno = vc->comm_ops->send( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);


    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = MPI_REQUEST_NULL;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, eager_pkt, sizeof(*eager_pkt), &sreq);
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg");
	}
	/* --END ERROR HANDLING-- */
	if (sreq != NULL)
	{
	    MPIDI_Request_set_seqnum(sreq, seqnum);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    /* sreq->comm = comm;
	      MPIR_Comm_add_ref(comm); -- not necessary for blocking functions */
	}
	
	goto fn_exit;
    }
    
    /* FIXME: flow control: limit number of outstanding eager messsages 
       containing data and need to be buffered by the receiver */
#ifdef USE_EAGER_SHORT
    if (dt_contig && data_sz <= MPIDI_EAGER_SHORT_SIZE) {
	mpi_errno = MPIDI_CH3_EagerContigShortSend( &sreq, 
					       MPIDI_CH3_PKT_EAGERSHORT_SEND,
					       (char *)buf + dt_true_lb,
					       data_sz, rank, tag, comm, 
					       context_offset );
    }
    else
#endif
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <=	
	vc->eager_max_msg_sz) {
	if (dt_contig) {
 	    mpi_errno = MPIDI_CH3_EagerContigSend( &sreq, 
						   MPIDI_CH3_PKT_EAGER_SEND,
						   (char *)buf + dt_true_lb,
						   data_sz, rank, tag, comm, 
						   context_offset );
	}
	else {
	    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	}
    }
    else {
Пример #10
0
int MPIDI_CH3_Connect_to_root(const char* port_name, MPIDI_VC_t** new_vc)
{
    int mpi_errno = MPI_SUCCESS;
    int str_errno;
    char ifname[MAX_HOST_DESCRIPTION_LEN];
    MPIDI_VC_t *vc;
    MPIDI_CH3_Pkt_cm_establish_t pkt;
    MPID_Request * sreq;
    int seqnum;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);

    *new_vc = NULL;
    if (!MPIDI_CH3I_Process.has_dpm)
        return MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, 
                                    __LINE__, MPI_ERR_OTHER, "**notimpl", 0);

    str_errno = MPIU_Str_get_string_arg(port_name, 
                                        MPIDI_CH3I_HOST_DESCRIPTION_KEY, 
                                        ifname, MAX_HOST_DESCRIPTION_LEN);
    if (str_errno != MPIU_STR_SUCCESS) {
        /* --BEGIN ERROR HANDLING */
        if (str_errno == MPIU_STR_FAIL) {
            MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**argstr_missinghost");
        }
        else {
            /* MPIU_STR_TRUNCATED or MPIU_STR_NONEM */
            MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_hostd");
        }
        /* --END ERROR HANDLING-- */
    }

    vc = MPIU_Malloc(sizeof(MPIDI_VC_t));
    if (!vc) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**nomem");
    }
    MPIDI_VC_Init(vc, NULL, 0);

    mpi_errno = MPIDI_CH3I_CM_Connect_raw_vc(vc, ifname);
    if (mpi_errno) {
        MPIU_ERR_POP(mpi_errno);
    }
    
    while (vc->ch.state != MPIDI_CH3I_VC_STATE_IDLE) {
        mpi_errno = MPID_Progress_test();
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            MPIU_ERR_POP(mpi_errno);
        }
    }

    /* fprintf(stderr, "[###] vc state to idel, now send cm_establish msg\n") */
    /* Now a connection is created, send a cm_establish message */
    /* FIXME: vc->mrail.remote_vc_addr is used to find remote vc
     * A more elegant way is needed */
    MPIDI_Pkt_init(&pkt, MPIDI_CH3_PKT_CM_ESTABLISH);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(&pkt, seqnum);
    pkt.vc_addr = vc->mrail.remote_vc_addr;
    mpi_errno = MPIDI_GetTagFromPort(port_name, &pkt.port_name_tag);
    if (mpi_errno != MPIU_STR_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER, "**argstr_port_name_tag");
    }

    mpi_errno = MPIDI_CH3_iStartMsg(vc, &pkt, sizeof(pkt), &sreq);
    if (mpi_errno != MPI_SUCCESS)
    {
        MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**fail", "**fail %s",
                             "Failed to send cm establish message");
    }

    if (sreq != NULL)
    {
        if (sreq->status.MPI_ERROR != MPI_SUCCESS)
        {
            mpi_errno = MPIR_Err_create_code(sreq->status.MPI_ERROR,
                                        MPIR_ERR_FATAL, FCNAME, __LINE__, 
                                        MPI_ERR_OTHER,
                                        "**fail", 0);
            MPID_Request_release(sreq);
            goto fn_fail;
        }
        MPID_Request_release(sreq);
    }

    *new_vc = vc;

fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_CONNECT_TO_ROOT);

    return mpi_errno;
}
Пример #11
0
int MPID_Rsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
	       MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_RSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_RSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
					"rank=%d, tag=%d, context=%d", 
                              rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq);
	goto fn_exit;
    }

    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->rsend)
    {
	mpi_errno = vc->comm_ops->rsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send;

	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
    
	MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND);
	ready_pkt->match.parts.rank = comm->rank;
	ready_pkt->match.parts.tag = tag;
	ready_pkt->match.parts.context_id = comm->context_id + context_offset;
	ready_pkt->sender_req_id = MPI_REQUEST_NULL;
	ready_pkt->data_sz = data_sz;

	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(ready_pkt, seqnum);
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, ready_pkt, sizeof(*ready_pkt), &sreq);
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**ch3|eagermsg", 0);
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */
	if (sreq != NULL)
	{
	    MPIDI_Request_set_seqnum(sreq, seqnum);
	    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND);
	    /* sreq->comm = comm;
	       MPIR_Comm_add_ref(comm); -- not needed for blocking operations */
	}

	goto fn_exit;
    }
    
    if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) {
        if (dt_contig)
        {
            mpi_errno = MPIDI_CH3_EagerContigSend( &sreq,
                                                   MPIDI_CH3_PKT_READY_SEND,
                                                   (char *)buf + dt_true_lb,
                                                   data_sz, rank, tag, comm,
                                                   context_offset );
        }
        else
        {
            MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
            MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
            mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq,
                                                      MPIDI_CH3_PKT_READY_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag,
                                                      comm, context_offset );
        }
    } else {
Пример #12
0
int MPIDI_CH3_PktHandler_Close( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, 
				intptr_t *buflen, MPIR_Request **rreqp )
{
    MPIDI_CH3_Pkt_close_t * close_pkt = &pkt->close;
    int mpi_errno = MPI_SUCCESS;
	    
    if (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_close_t * resp_pkt = &upkt.close;
	MPIR_Request * resp_sreq;
	
	MPIDI_Pkt_init(resp_pkt, MPIDI_CH3_PKT_CLOSE);
	resp_pkt->ack = TRUE;
	
	MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,"sending close(TRUE) to %d",
		       vc->pg_rank);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, resp_pkt, sizeof(*resp_pkt), &resp_sreq);
        MPIR_ERR_CHKANDJUMP(mpi_errno, mpi_errno, MPI_ERR_OTHER, "**ch3|send_close_ack");
	
	if (resp_sreq != NULL)
	{
	    /* There is still another reference being held by the channel.  It
	       will not be released until the pkt is actually sent. */
	    MPIR_Request_free(resp_sreq);
	}
    }
    
    if (close_pkt->ack == FALSE)
    {
	if (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE)
	{
	    MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,
		   "received close(FALSE) from %d, moving to CLOSE_ACKED.",
		   vc->pg_rank);
            MPIDI_CHANGE_VC_STATE(vc, CLOSE_ACKED);
	}
	else /* (vc->state == MPIDI_VC_STATE_ACTIVE) */
        {
	    if (vc->state != MPIDI_VC_STATE_ACTIVE)
            {
		MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_DISCONNECT, TYPICAL, (MPL_DBG_FDEST, "Unexpected state %s in vc %p (rank=%d) (expecting MPIDI_VC_STATE_ACTIVE)\n", MPIDI_VC_GetStateString(vc->state), vc, vc->pg_rank ));
	    MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,
                     "received close(FALSE) from %d, moving to REMOTE_CLOSE.",
				   vc->pg_rank);
            }
	    MPIR_Assert(vc->state == MPIDI_VC_STATE_ACTIVE);
            MPIDI_CHANGE_VC_STATE(vc, REMOTE_CLOSE);
	}
    }
    else /* (close_pkt->ack == TRUE) */
    {
	MPL_DBG_MSG_D(MPIDI_CH3_DBG_DISCONNECT,TYPICAL,
                       "received close(TRUE) from %d, moving to CLOSED.", 
			       vc->pg_rank);
	MPIR_Assert (vc->state == MPIDI_VC_STATE_LOCAL_CLOSE ||
		     vc->state == MPIDI_VC_STATE_CLOSE_ACKED);
        MPIDI_CHANGE_VC_STATE(vc, CLOSED);
	/* For example, with sockets, Connection_terminate will close
	   the socket */
	mpi_errno = MPIDI_CH3_Connection_terminate(vc);
    }
    
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *rreqp = NULL;

 fn_fail:
    return mpi_errno;
}
Пример #13
0
int MPIDI_CH3_PktHandler_RndvReqToSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
					MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPID_Request * rreq;
    int found;
    MPIDI_CH3_Pkt_rndv_req_to_send_t * rts_pkt = &pkt->rndv_req_to_send;
    int mpi_errno = MPI_SUCCESS;
    
    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
 "received rndv RTS pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d, data_sz=" MPIDI_MSG_SZ_FMT,
	      rts_pkt->sender_req_id, rts_pkt->match.parts.rank, 
					rts_pkt->match.parts.tag, 
              rts_pkt->match.parts.context_id, rts_pkt->data_sz));
    MPIU_DBG_MSGPKT(vc,rts_pkt->match.parts.tag,rts_pkt->match.parts.context_id,
		    rts_pkt->match.parts.rank,rts_pkt->data_sz,
		    "ReceivedRndv");

    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_MSGQ_MUTEX);
    rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&rts_pkt->match, &found);
    MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp());

    /* If the completion counter is 0, that means that the communicator to
     * which this message is being sent has been revoked and we shouldn't
     * bother finishing this. */
    if (!found && MPID_cc_get(rreq->cc) == 0) {
        *rreqp = NULL;
        goto fn_fail;
    }
    
    set_request_info(rreq, rts_pkt, MPIDI_REQUEST_RNDV_MSG);

    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_MSGQ_MUTEX);

    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    
    if (found)
    {
	MPID_Request * cts_req;
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_rndv_clr_to_send_t * cts_pkt = &upkt.rndv_clr_to_send;
	
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"posted request found");
	
	/* FIXME: What if the receive user buffer is not big enough to
	   hold the data about to be cleared for sending? */
	
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending rndv CTS packet");
	MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RNDV_CLR_TO_SEND);
	cts_pkt->sender_req_id = rts_pkt->sender_req_id;
	cts_pkt->receiver_req_id = rreq->handle;
        MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, cts_pkt, sizeof(*cts_pkt), &cts_req);
        MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				"**ch3|ctspkt");
	}
	if (cts_req != NULL) {
	    MPID_Request_release(cts_req);
	}
    }
    else
    {
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated");
	
	/*
	 * A MPID_Probe() may be waiting for the request we just 
	 * inserted, so we need to tell the progress engine to exit.
	 *
	 * FIXME: This will cause MPID_Progress_wait() to return to the
	 * MPI layer each time an unexpected RTS packet is
	 * received.  MPID_Probe() should atomically increment a
	 * counter and MPIDI_CH3_Progress_signal_completion()
	 * should only be called if that counter is greater than zero.
	 */
	MPIDI_CH3_Progress_signal_completion();
    }
    
    *rreqp = NULL;

 fn_fail:
    return mpi_errno;
}
Пример #14
0
int MPID_Cancel_send(MPIR_Request * sreq)
{
    MPIDI_VC_t * vc;
    int proto;
    int flag;
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_CANCEL_SEND);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_CANCEL_SEND);
    
    MPIR_Assert(sreq->kind == MPIR_REQUEST_KIND__SEND);

    MPIDI_Request_cancel_pending(sreq, &flag);
    if (flag)
    {
	goto fn_exit;
    }

    /*
     * FIXME: user requests returned by MPI_Ibsend() have a NULL comm pointer
     * and no pointer to the underlying communication
     * request.  For now, we simply fail to cancel the request.  In the future,
     * we should add a new request kind to indicate that
     * the request is a BSEND.  Then we can properly cancel the request, much 
     * in the way we do persistent requests.
     */
    if (sreq->comm == NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(sreq->comm, sreq->dev.match.parts.rank, &vc);

    proto = MPIDI_Request_get_msg_type(sreq);

    if (proto == MPIDI_REQUEST_SELF_MSG)
    {
	MPIR_Request * rreq;
	
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,
		     "attempting to cancel message sent to self");
	
	MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
	rreq = MPIDI_CH3U_Recvq_FDU(sreq->handle, &sreq->dev.match);
	MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_MSGQ_MUTEX);
	if (rreq)
	{
	    MPIR_Assert(rreq->dev.partner_request == sreq);
	    
	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
             "send-to-self cancellation successful, sreq=0x%08x, rreq=0x%08x",
						sreq->handle, rreq->handle));

            /* Pull the message out of the unexpected queue since it's
             * being cancelled.  The below request release drops one
             * reference.  We explicitly drop a second reference,
             * because the receive request will never be visible to
             * the user. */
            MPIR_Request_free(rreq);
            MPIR_Request_free(rreq);

	    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
            mpi_errno = MPID_Request_complete(sreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	}
	else
	{
	    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, FALSE);
	    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
               "send-to-self cancellation failed, sreq=0x%08x, rreq=0x%08x",
						sreq->handle, rreq->handle));
	}
	
	goto fn_exit;
    }

    /* If the message went over a netmod and it provides a cancel_send
       function, call it here. */
#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->cancel_send)
    {
        mpi_errno = vc->comm_ops->cancel_send(vc, sreq);
        goto fn_exit;
    }
#endif

    /* Check to see if the send is still in the send queue.  If so, remove it, 
       mark the request and cancelled and complete, and
       release the device's reference to the request object.  
    */
    {
	int cancelled;
	
	if (proto == MPIDI_REQUEST_RNDV_MSG)
	{
	    MPIR_Request * rts_sreq;
	    /* The cancellation of the RTS request needs to be atomic through 
	       the destruction of the RTS request to avoid
               conflict with release of the RTS request if the CTS is received
	       (see handling of a rendezvous CTS packet in
               MPIDI_CH3U_Handle_recv_pkt()).  
	       MPID_Request_fetch_and_clear_rts_sreq() is used to gurantee 
	       that atomicity. */
	    MPIDI_Request_fetch_and_clear_rts_sreq(sreq, &rts_sreq);
	    if (rts_sreq != NULL) 
	    {
		cancelled = FALSE;
		
		/* since we attempted to cancel a RTS request, then we are 
		   responsible for releasing that request */
		MPIR_Request_free(rts_sreq);

		/* --BEGIN ERROR HANDLING-- */
		if (mpi_errno != MPI_SUCCESS)
		{
		    mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
						     "**ch3|cancelrndv", 0);
		    goto fn_exit;
		}
		/* --END ERROR HANDLING-- */
		
		if (cancelled)
		{
		    MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
		    /* no other thread should be waiting on sreq, so it is 
		       safe to reset ref_count and cc */
                    MPIR_cc_set(&sreq->cc, 0);
                    /* FIXME should be a decr and assert, not a set */
		    MPIR_Object_set_ref(sreq, 1);
		    goto fn_exit;
		}
	    }
	}
	else
	{
	    cancelled = FALSE;
	    if (cancelled)
	    {
		MPIR_STATUS_SET_CANCEL_BIT(sreq->status, TRUE);
		/* no other thread should be waiting on sreq, so it is safe to 
		   reset ref_count and cc */
                MPIR_cc_set(&sreq->cc, 0);
                /* FIXME should be a decr and assert, not a set */
		MPIR_Object_set_ref(sreq, 1);
		goto fn_exit;
	    }
	}
    }

    /* Part or all of the message has already been sent, so we need to send a 
       cancellation request to the receiver in an attempt
       to catch the message before it is matched. */
    {
	int was_incomplete;
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_cancel_send_req_t * const csr_pkt = &upkt.cancel_send_req;
	MPIR_Request * csr_sreq;
	
	MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
              "sending cancel request to %d for 0x%08x", 
	      sreq->dev.match.parts.rank, sreq->handle));
	
	/* The completion counter and reference count are incremented to keep 
	   the request around long enough to receive a
	   response regardless of what the user does (free the request before 
	   waiting, etc.). */
	MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
	if (!was_incomplete)
	{
	    /* The reference count is incremented only if the request was 
	       complete before the increment. */
	    MPIR_Request_add_ref( sreq );
	}

	MPIDI_Pkt_init(csr_pkt, MPIDI_CH3_PKT_CANCEL_SEND_REQ);
	csr_pkt->match.parts.rank = sreq->comm->rank;
	csr_pkt->match.parts.tag = sreq->dev.match.parts.tag;
	csr_pkt->match.parts.context_id = sreq->dev.match.parts.context_id;
	csr_pkt->sender_req_id = sreq->handle;
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iStartMsg(vc, csr_pkt, sizeof(*csr_pkt), &csr_sreq);
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|cancelreq");
	}
	if (csr_sreq != NULL)
	{
	    MPIR_Request_free(csr_sreq);
	}
    }
    
    /* FIXME: if send cancellation packets are allowed to arrive out-of-order 
       with respect to send packets, then we need to
       timestamp send and cancel packets to insure that a cancellation request 
       does not bypass the send packet to be cancelled
       and erroneously cancel a previously sent message with the same request 
       handle. */
    /* FIXME: A timestamp is more than is necessary; a message sequence number
       should be adequate. */
 fn_fail:
 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_CANCEL_SEND);
    return mpi_errno;
}
Пример #15
0
int MPIDI_CH3_PktHandler_EagerSyncSend( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
					intptr_t *buflen, MPIR_Request **rreqp )
{
    MPIDI_CH3_Pkt_eager_send_t * es_pkt = &pkt->eager_send;
    MPIR_Request * rreq;
    int found;
    int complete;
    char *data_buf;
    intptr_t data_len;
    int mpi_errno = MPI_SUCCESS;
    
    MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_OTHER,VERBOSE,(MPL_DBG_FDEST,
     "received eager sync send pkt, sreq=0x%08x, rank=%d, tag=%d, context=%d",
	      es_pkt->sender_req_id, es_pkt->match.parts.rank, 
	      es_pkt->match.parts.tag, 
              es_pkt->match.parts.context_id));
    MPL_DBG_MSGPKT(vc,es_pkt->match.parts.tag,es_pkt->match.parts.context_id,
		    es_pkt->match.parts.rank,es_pkt->data_sz,
		    "ReceivedEagerSync");
	    
    rreq = MPIDI_CH3U_Recvq_FDP_or_AEU(&es_pkt->match, &found);
    MPIR_ERR_CHKANDJUMP1(!rreq, mpi_errno,MPI_ERR_OTHER, "**nomemreq", "**nomemuereq %d", MPIDI_CH3U_Recvq_count_unexp());

    /* If the completion counter is 0, that means that the communicator to
     * which this message is being sent has been revoked and we shouldn't
     * bother finishing this. */
    if (!found && MPIR_cc_get(rreq->cc) == 0) {
        *rreqp = NULL;
        goto fn_fail;
    }
    
    set_request_info(rreq, es_pkt, MPIDI_REQUEST_EAGER_MSG);

    data_len = ((*buflen - sizeof(MPIDI_CH3_Pkt_t) >= rreq->dev.recv_data_sz)
                ? rreq->dev.recv_data_sz : *buflen - sizeof(MPIDI_CH3_Pkt_t));
    data_buf = (char *)pkt + sizeof(MPIDI_CH3_Pkt_t);
    
    if (found)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_sync_ack_t * const esa_pkt = &upkt.eager_sync_ack;
	MPIR_Request * esa_req;

	if (rreq->dev.recv_data_sz == 0) {
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
            mpi_errno = MPID_Request_complete(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	    *rreqp = NULL;
	}
	else {
	    mpi_errno = MPIDI_CH3U_Receive_data_found( rreq, data_buf,
                                                       &data_len, &complete );
	    if (mpi_errno != MPI_SUCCESS) {
		MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
		    "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND");
	    }

            *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

            if (complete) 
            {
                mpi_errno = MPID_Request_complete(rreq);
                if (mpi_errno != MPI_SUCCESS) {
                    MPIR_ERR_POP(mpi_errno);
                }
                *rreqp = NULL;
            }
            else
            {
                *rreqp = rreq;
            }
	}
	MPL_DBG_MSG(MPIDI_CH3_DBG_OTHER,VERBOSE,"sending eager sync ack");
	
	MPIDI_Pkt_init(esa_pkt, MPIDI_CH3_PKT_EAGER_SYNC_ACK);
	esa_pkt->sender_req_id = rreq->dev.sender_req_id;
	/* Because this is a packet handler, it is already within a CH3 CS */
	/* MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex); */
	mpi_errno = MPIDI_CH3_iStartMsg(vc, esa_pkt, sizeof(*esa_pkt), &esa_req);
	/* MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex); */
	if (mpi_errno != MPI_SUCCESS) {
	    MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,
				"**ch3|syncack");
	}
	if (esa_req != NULL) {
	    MPIR_Request_free(esa_req);
	}
    }
    else
    {
	if (rreq->dev.recv_data_sz == 0) {
            *buflen = sizeof(MPIDI_CH3_Pkt_t);
            mpi_errno = MPID_Request_complete(rreq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
	    *rreqp = NULL;
	}
	else {
	    mpi_errno = MPIDI_CH3U_Receive_data_unexpected( rreq, data_buf,
                                                            &data_len, &complete );
	    if (mpi_errno != MPI_SUCCESS) {
		MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**ch3|postrecv",
		    "**ch3|postrecv %s", "MPIDI_CH3_PKT_EAGER_SYNC_SEND");
	    }

            *buflen = sizeof(MPIDI_CH3_Pkt_t) + data_len;

            if (complete) 
            {
                mpi_errno = MPID_Request_complete(rreq);
                if (mpi_errno != MPI_SUCCESS) {
                    MPIR_ERR_POP(mpi_errno);
                }
                *rreqp = NULL;
            }
            else
            {
                *rreqp = rreq;
            }
	}
	MPIDI_Request_set_sync_send_flag(rreq, TRUE);
    }
 fn_fail:
    return mpi_errno;
}
Пример #16
0
int MPIDI_CH3_EagerContigShortSend( MPID_Request **sreq_p, 
				    MPIDI_CH3_Pkt_type_t reqtype, 
				    const void * buf, MPIDI_msg_sz_t data_sz, int rank, 
				    int tag, MPID_Comm * comm, 
				    int context_offset )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_VC_t * vc;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_eagershort_send_t * const eagershort_pkt = 
	&upkt.eagershort_send;
    MPID_Request *sreq = *sreq_p;
    
    /*    printf( "Sending short eager\n"); fflush(stdout); */
    MPIDI_Pkt_init(eagershort_pkt, reqtype);
    eagershort_pkt->match.parts.rank	     = comm->rank;
    eagershort_pkt->match.parts.tag	     = tag;
    eagershort_pkt->match.parts.context_id = comm->context_id + context_offset;
    eagershort_pkt->data_sz	     = data_sz;
    
    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
       "sending contiguous short eager message, data_sz=" MPIDI_MSG_SZ_FMT,
					data_sz));
	    
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
    MPIDI_VC_FAI_send_seqnum(vc, seqnum);
    MPIDI_Pkt_set_seqnum(eagershort_pkt, seqnum);

    /* Copy the payload. We could optimize this if data_sz & 0x3 == 0 
       (copy (data_sz >> 2) ints, inline that since data size is 
       currently limited to 4 ints */
    {
	unsigned char * restrict p = 
	    (unsigned char *)eagershort_pkt->data;
	unsigned char const * restrict bufp = (unsigned char *)buf;
	int i;
	for (i=0; i<data_sz; i++) {
	    *p++ = *bufp++;
	}
    }

    MPIU_DBG_MSGPKT(vc,tag,eagershort_pkt->match.parts.context_id,rank,data_sz,
		    "EagerShort");
    MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, eagershort_pkt, sizeof(*eagershort_pkt), sreq_p);
    MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
    if (mpi_errno != MPI_SUCCESS) {
	MPIR_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|eagermsg");
    }
    sreq = *sreq_p;
    if (sreq != NULL) {
	/*printf( "Surprise, did not complete send of eagershort (starting connection?)\n" ); 
	  fflush(stdout); */
        /* MT FIXME setting fields in the request after it has been given to the
         * progress engine is racy.  The start call above is protected by
         * vc CS, but the progress engine is protected by MPIDCOMM.  So
         * we can't just extend the CS type below this point... what's the fix? */
	MPIDI_Request_set_seqnum(sreq, seqnum);
	MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);
    }

 fn_fail:    
    return mpi_errno;
}