Beispiel #1
0
static int MPIR_Bsend_check_active( void )
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Bsend_data_t *active = BsendBuffer.active, *next_active;

    MPIU_DBG_MSG_P(BSEND,TYPICAL,"Checking active starting at %p", active);
    while (active) {
	MPI_Request r = active->request->handle;
	int         flag;
	
	next_active = active->next;

	if (active->kind == IBSEND) {
	    /* We handle ibsend specially to allow for the user
	       to attempt and cancel the request. Also, to allow
	       for a cancel attempt (which must be attempted before
	       a successful test or wait), we only start
	       testing when the user has successfully released
	       the request (it is a grequest, the free call will do it) */
	    flag = 0;
            /* XXX DJG FIXME-MT should we be checking this? */
	    if (MPIU_Object_get_ref(active->request) == 1) {
		mpi_errno = MPIR_Test_impl(&r, &flag, MPI_STATUS_IGNORE );
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
	    } else {
		/* We need to invoke the progress engine in case we 
		 need to advance other, incomplete communication.  */
		MPID_Progress_state progress_state;
		MPID_Progress_start(&progress_state);
		mpi_errno = MPID_Progress_test( );
		MPID_Progress_end(&progress_state);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
	    }
	} else {
	    mpi_errno = MPIR_Test_impl( &r, &flag, MPI_STATUS_IGNORE );
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
	}
	if (flag) {
	    /* We're done.  Remove this segment */
	    MPIU_DBG_MSG_P(BSEND,TYPICAL,"Removing segment %p", active);
	    MPIR_Bsend_free_segment( active );
	}
	active = next_active;
	MPIU_DBG_MSG_P(BSEND,TYPICAL,"Next active is %p",active);
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #2
0
static inline int connection_pop_sendq_req(MPIDI_CH3I_Connection_t * conn)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3I_VC *vcch = &conn->vc->ch;
    MPIDI_STATE_DECL(MPID_STATE_CONNECTION_POP_SENDQ_REQ);


    MPIDI_FUNC_ENTER(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
    /* post send of next request on the send queue */

    /* FIXME: Is dequeue/get next the operation we really want? */
    MPIDI_CH3I_SendQ_dequeue(vcch);
    conn->send_active = MPIDI_CH3I_SendQ_head(vcch); /* MT */
    if (conn->send_active != NULL)
    {
	MPIU_DBG_MSG_P(CH3_CONNECT,TYPICAL,"conn=%p: Posting message from connection send queue", conn );
	mpi_errno = MPIDU_Sock_post_writev(conn->sock, conn->send_active->dev.iov, conn->send_active->dev.iov_count, NULL);
	if (mpi_errno != MPI_SUCCESS) {
	    MPIU_ERR_POP(mpi_errno);
	}
    }
    
 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_CONNECTION_POP_SENDQ_REQ);
    return mpi_errno;
}
Beispiel #3
0
static void big_get(void *buf, ptl_size_t left_to_get, MPIDI_VC_t *vc, ptl_match_bits_t match_bits, MPID_Request *rreq)
{
    int ret;
    MPID_nem_ptl_vc_area *vc_ptl;
    ptl_size_t start, get_sz;

    vc_ptl = VC_PTL(vc);
    start = (ptl_size_t)buf;

    /* we need to handle all events */
    REQ_PTL(rreq)->event_handler = handler_recv_big_get;

    while (left_to_get > 0) {
        /* get up to the maximum allowed by the portals interface */
        if (left_to_get > MPIDI_nem_ptl_ni_limits.max_msg_size)
            get_sz = MPIDI_nem_ptl_ni_limits.max_msg_size;
        else
            get_sz = left_to_get;

        ret = MPID_nem_ptl_rptl_get(MPIDI_nem_ptl_global_md, start, get_sz, vc_ptl->id, vc_ptl->ptg, match_bits, 0, rreq);
        DBG_MSG_GET("global", get_sz, vc->pg_rank, match_bits);
        MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "   buf=%p", (char *)start);
        MPIU_Assert(ret == 0);

        /* account for what has been sent */
        start += get_sz;
        left_to_get -= get_sz;
        REQ_PTL(rreq)->num_gets++;
    }
}
Beispiel #4
0
int MPIDI_CH3_PktHandler_EagerSyncAck( MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt,
				       MPIDI_msg_sz_t *buflen, MPID_Request **rreqp )
{
    MPIDI_CH3_Pkt_eager_sync_ack_t * esa_pkt = &pkt->eager_sync_ack;
    MPID_Request * sreq;
    int mpi_errno = MPI_SUCCESS;
    
    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
	   "received eager sync ack pkt, sreq=0x%08x", esa_pkt->sender_req_id);
	    
    MPID_Request_get_ptr(esa_pkt->sender_req_id, sreq);
    /* decrement CC (but don't mark data transfer as complete since the 
       transfer could still be in progress) */

    /* FIXME: This sometimes segfaults */
    mpi_errno = MPID_Request_complete(sreq);
    if (mpi_errno != MPI_SUCCESS) {
        MPIR_ERR_POP(mpi_errno);
    }
    
    *buflen = sizeof(MPIDI_CH3_Pkt_t);
    *rreqp = NULL;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #5
0
void MPIR_CommL_forget( MPID_Comm *comm_ptr )
{
#if defined(FINEGRAIN_MPI) /* FG: TODO Temporary bypass */
    return;
#endif

    MPID_Comm *p, *prev;

    MPIU_DBG_MSG_P(COMM,VERBOSE,
		   "Forgetting communicator %p from remember list",comm_ptr);
    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    p = MPIR_All_communicators.head;
    prev = 0;
    while (p) {
	if (p == comm_ptr) {
	    if (prev) prev->comm_next = p->comm_next;
	    else MPIR_All_communicators.head = p->comm_next;
	    break;
	}
	if (p == p->comm_next) {
	    MPL_internal_error_printf( "Mangled pointers to communicators - next is itself for %p\n", p );
	    break;
	}
	prev = p;
	p = p->comm_next;
    }
    /* Record a change to the list */
    MPIR_All_communicators.sequence_number++;
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
}
Beispiel #6
0
void MPIDI_CH3_Request_destroy(MPID_Request * req)
{
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
    MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE,
		   "freeing request, handle=0x%08x", req->handle);
    
#ifdef MPICH_DBG_OUTPUT
    /*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/
    if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST)
    {
	int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
                      FCNAME, __LINE__, MPI_ERR_OTHER, 
                      "**invalid_handle", "**invalid_handle %d", req->handle);
	MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
    }
    /* XXX DJG FIXME should we be checking this? */
    /*MPIU_Assert(req->ref_count == 0);*/
    if (req->ref_count != 0)
    {
	int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL,
                       FCNAME, __LINE__, MPI_ERR_OTHER, 
              "**invalid_refcount", "**invalid_refcount %d", req->ref_count);
	MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
    }
#endif

    /* FIXME: We need a better way to handle these so that we
       do not always need to initialize these fields and check them
       when we destroy a request */
    /* FIXME: We need a way to call these routines ONLY when the 
       related ref count has become zero. */
    if (req->comm != NULL) {
	MPIR_Comm_release(req->comm, 0);
    }

    if (req->greq_fns != NULL) {
        MPIU_Free(req->greq_fns);
    }

    if (req->dev.datatype_ptr != NULL) {
	MPID_Datatype_release(req->dev.datatype_ptr);
    }

    if (req->dev.segment_ptr != NULL) {
	MPID_Segment_free(req->dev.segment_ptr);
    }

    if (MPIDI_Request_get_srbuf_flag(req)) {
	MPIDI_CH3U_SRBuf_free(req);
    }

    MPIU_Handle_obj_free(&MPID_Request_mem, req);
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY);
}
Beispiel #7
0
int MPID_Cancel_recv(MPID_Request * rreq)
{
    MPIDI_STATE_DECL(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIDI_FUNC_ENTER(MPID_STATE_MPID_CANCEL_RECV);
    
    MPIU_Assert(rreq->kind == MPID_REQUEST_RECV);

#if defined (_OSU_PSM_)
    rreq->psm_flags |= PSM_RECV_CANCEL;
    if(psm_do_cancel(rreq) == MPI_SUCCESS) {
        *(rreq->cc_ptr) = 0;
        // MPID_Request_release(rreq);
    }
    goto fn_exit;
#endif

#if defined(_OSU_MVAPICH_)
    /* OSU-MPI2 requires extra step to finish rndv request */ 
    MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(rreq);
#endif /* defined(_OSU_MVAPICH_) */

    if (MPIDI_CH3U_Recvq_DP(rreq))
    {
	MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
		       "request 0x%08x cancelled", rreq->handle);
	rreq->status.cancelled = TRUE;
	rreq->status.count = 0;
	MPID_REQUEST_SET_COMPLETED(rreq);
	MPID_Request_release(rreq);
    }
    else
    {
	MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
	    "request 0x%08x already matched, unable to cancel", rreq->handle);
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_CANCEL_RECV);
    return MPI_SUCCESS;
}
Beispiel #8
0
void MPIR_CommL_remember( MPID_Comm *comm_ptr )
{
#if defined(FINEGRAIN_MPI) /* FG: TODO Temporary bypass */
    return;
#endif
    MPIU_DBG_MSG_P(COMM,VERBOSE,
		   "Adding communicator %p to remember list",comm_ptr);
    MPIU_DBG_MSG_P(COMM,VERBOSE,
		   "Remember list structure address is %p",&MPIR_All_communicators);
    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    if (comm_ptr == MPIR_All_communicators.head) {
	MPL_internal_error_printf( "Internal error: communicator is already on free list\n" );
	return;
    }
    comm_ptr->comm_next = MPIR_All_communicators.head;
    MPIR_All_communicators.head = comm_ptr;
    MPIR_All_communicators.sequence_number++;
    MPIU_DBG_MSG_P(COMM,VERBOSE,
		   "master head is %p", MPIR_All_communicators.head );

    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
}
Beispiel #9
0
int MPIDI_CH3_VC_Destroy(MPIDI_VC_t *vc )
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_VC_DESTROY);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_VC_DESTROY);

    /* no need to destroy vc to self, this corresponds to the optimization above
     * in MPIDI_CH3_VC_Init */
    if (vc->pg == MPIDI_CH3I_my_pg && vc->pg_rank == MPIDI_CH3I_my_rank) {
        MPIU_DBG_MSG_P(NEM_SOCK_DET, VERBOSE, "skipping self vc=%p", vc);
        goto fn_exit;
    }

    mpi_errno = MPID_nem_vc_destroy(vc);

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_VC_DESTROY);
    return mpi_errno;
}
Beispiel #10
0
int MPID_nem_mx_cancel_recv(MPIDI_VC_t *vc, MPID_Request *rreq)
{
    mx_request_t *mx_request = NULL;
    mx_return_t   ret;
    uint32_t      result;
    int           mpi_errno = MPI_SUCCESS;
    int           handled = FALSE;
   
    mx_request = &(REQ_FIELD(rreq,mx_request));
    /* FIXME this test is probably not correct with multiple netmods        */
    /* We need to know to which netmod a recv request actually "belongs" to */
    if(mx_request != NULL)
    {
       ret = mx_cancel(MPID_nem_mx_local_endpoint,mx_request,&result);
       MPIU_ERR_CHKANDJUMP1(ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_cancel", "**mx_cancel %s", mx_strerror(ret));
       
       if (result)
       {	    
	  int found;
	  rreq->status.cancelled = TRUE;
	  found = MPIDI_CH3U_Recvq_DP(rreq);
	  MPIU_Assert(found);
	  rreq->status.count = 0;
	  MPID_REQUEST_SET_COMPLETED(rreq);
	  MPID_Request_release(rreq);       
       }
       else
       {
	  rreq->status.cancelled = FALSE;
	  MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
			 "request 0x%08x already matched, unable to cancel", rreq->handle);
       }
       handled = TRUE;
     }
   
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Beispiel #11
0
/**
 * Initialize the HCAs
 * Look at rdma_open_hca() & rdma_iba_hca_init_noqp() in
 * mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/rdma_iba_priv.c
 *
 * Store all the HCA info in mv2_nem_dev_info_t->hca[hca_num]
 *
 * Output:
 *         hca_list: fill it with the HCAs information
 *
 * \see hca_list
 */
int MPID_nem_ib_init_hca()
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_INIT_HCA);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_INIT_HCA);


    struct ibv_device *ib_dev    = NULL;
    struct ibv_device **dev_list = NULL;
    int nHca;
    int num_devices = 0;

#ifdef CRC_CHECK
    gen_crc_table();
#endif
    memset( hca_list, 0, sizeof(hca_list) );

    /* Get the list of devices */
    dev_list = ibv_get_device_list(&num_devices);
    if (dev_list==NULL) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "No IB device found");
    }

    if (umad_init() < 0)
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "Can't init UMAD library");

    /* Runtime checks */
    MPIU_Assert( num_devices<=MAX_NUM_HCAS );
    if ( num_devices> MAX_NUM_HCAS) {
        MPIU_Error_printf( "WARNING: found %d IB devices, the maximum is %d (MAX_NUM_HCAS). ",
        		num_devices, MAX_NUM_HCAS);
        num_devices = MAX_NUM_HCAS;
    }

    if ( ib_hca_num_hcas > num_devices) {
    	MPIU_Error_printf( "WARNING: user requested %d IB devices, the available number is %d. ",
        		ib_hca_num_hcas, num_devices);
        ib_hca_num_hcas = num_devices;
    }

    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] Found %d HCAs\n", num_devices);
    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] User requested %d\n", ib_hca_num_hcas);


    /* Retrieve information for each found device */
    for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) {

    	/* Check for user choice */
        if( (rdma_iba_hca[0]==0) || (strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32)!=0) || (ib_hca_num_hcas > 1)) {
            /* User hasn't specified any HCA name, or the number of HCAs is greater then 1 */
            ib_dev = dev_list[nHca];

        } else {
            /* User specified a HCA, try to look for it */
            int dev_count;

            dev_count = 0;
            while(dev_list[dev_count]) {
                if(!strncmp(ibv_get_device_name(dev_list[dev_count]), rdma_iba_hca, 32)) {
                    ib_dev = dev_list[dev_count];
                    break;
                }
                dev_count++;
            }
        }

        /* Check if device has been identified */
        hca_list[nHca].ib_dev = ib_dev;
        if (!ib_dev) {
	        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "**fail %s", "No IB device found");
        }

        MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] HCA device %d found\n", nHca);



        hca_list[nHca].nic_context = ibv_open_device(ib_dev);
        if (hca_list[nHca].nic_context==NULL) {
	        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "%s %d", "Failed to open HCA number", nHca);
        }

        hca_list[nHca].ptag = ibv_alloc_pd(hca_list[nHca].nic_context);
        if (!hca_list[nHca].ptag) {
            MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                    "**fail", "%s%d", "Failed to alloc pd number ", nHca);
        }


        /* Set the hca type */
    #if defined(RDMA_CM)
        if (process_info.use_iwarp_mode) {
    	    if ((mpi_errno = rdma_cm_get_hca_type(process_info.use_iwarp_mode, &process_info.hca_type)) != MPI_SUCCESS)
    	    {
    		    MPIU_ERR_POP(mpi_errno);
    	    }

    	    if (process_info.hca_type == CHELSIO_T3)
    	    {
    		    process_info.use_iwarp_mode = 1;
    	    }
        }
        else
    #endif /* defined(RDMA_CM) */

		mpi_errno = get_hca_type(hca_list[nHca].ib_dev, hca_list[nHca].nic_context, &hca_list[nHca].hca_type);
        if (mpi_errno != MPI_SUCCESS)
        {
        	fprintf(stderr, "[%s, %d] Error in get_hca_type", __FILE__, __LINE__ );
            MPIU_ERR_POP(mpi_errno);
        }

    }



    if (!strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32) &&
        (ib_hca_num_hcas==1) && (num_devices > nHca) &&
        (rdma_find_active_port(hca_list[0].nic_context, hca_list[nHca].ib_dev)==-1)) {
        /* Trac #376 - There are multiple rdma capable devices (num_devices) in
         * the system. The user has asked us to use ANY (!strncmp) ONE device
         * (rdma_num_hcas), and the first device does not have an active port. So
         * try to find some other device with an active port.
         */
    	int j;
        for (j = 0; dev_list[j]; j++) {
            ib_dev = dev_list[j];
            if (ib_dev) {
            	hca_list[0].nic_context = ibv_open_device(ib_dev);
                if (!hca_list[0].nic_context) {
                    /* Go to next device */
                    continue;
                }
                if (rdma_find_active_port(hca_list[0].nic_context, ib_dev)!=-1) {
                	hca_list[0].ib_dev = ib_dev;
                	hca_list[0].ptag = ibv_alloc_pd(hca_list[0].nic_context);
                    if (!hca_list[0].ptag) {
                        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                             "**fail", "%s%d", "Failed to alloc pd number ", nHca);
                    }
                }
            }
        }
    }

fn_exit:
    /* Clean up before exit */
	if (dev_list!=NULL)
	  ibv_free_device_list(dev_list);

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_INIT_HCA);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Beispiel #12
0
MPID_Request * MPID_Request_create(void)
{
    MPID_Request * req;
    MPIDI_STATE_DECL(MPID_STATE_MPID_REQUEST_CREATE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_REQUEST_CREATE);
    
    req = MPIU_Handle_obj_alloc(&MPID_Request_mem);
    if (req != NULL)
    {
	MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE,
		       "allocated request, handle=0x%08x", req->handle);
#ifdef MPICH_DBG_OUTPUT
	/*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/
	if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST)
	{
	    int mpi_errno;
	    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, 
		       FCNAME, __LINE__, MPI_ERR_OTHER, 
		       "**invalid_handle", "**invalid_handle %d", req->handle);
	    MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL);
	}
#endif
	/* FIXME: This makes request creation expensive.  We need to trim
	   this to the basics, with additional setup for special-purpose 
	   requests (think base class and inheritance).  For example, do we 
	   *really* want to set the kind to UNDEFINED? And should the RMA 
	   values be set only for RMA requests? */
	MPIU_Object_set_ref(req, 1);
	req->kind		   = MPID_REQUEST_UNDEFINED;
        MPID_cc_set(&req->cc, 1);
	req->cc_ptr		   = &req->cc;
	/* FIXME: status fields meaningful only for receive, and even then
	   should not need to be set. */
	req->status.MPI_SOURCE	   = MPI_UNDEFINED;
	req->status.MPI_TAG	   = MPI_UNDEFINED;
	req->status.MPI_ERROR	   = MPI_SUCCESS;
        MPIR_STATUS_SET_COUNT(req->status, 0);
        MPIR_STATUS_SET_CANCEL_BIT(req->status, FALSE);
	req->comm		   = NULL;
        req->greq_fns              = NULL;
        req->errflag               = MPIR_ERR_NONE;
	req->dev.datatype_ptr	   = NULL;
	req->dev.segment_ptr	   = NULL;
	/* Masks and flags for channel device state in an MPID_Request */
	req->dev.state		   = 0;
	req->dev.cancel_pending	   = FALSE;
	/* FIXME: RMA ops shouldn't need to be set except when creating a
	   request for RMA operations */
	req->dev.target_win_handle = MPI_WIN_NULL;
	req->dev.source_win_handle = MPI_WIN_NULL;
        req->dev.lock_queue_entry  = NULL;
	req->dev.dtype_info	   = NULL;
	req->dev.dataloop	   = NULL;
	req->dev.iov_offset        = 0;
        req->dev.flags             = MPIDI_CH3_PKT_FLAG_NONE;
        req->dev.resp_request_handle = MPI_REQUEST_NULL;
        req->dev.user_buf          = NULL;
        req->dev.OnDataAvail       = NULL;
        req->dev.OnFinal           = NULL;
        req->dev.user_buf          = NULL;
        req->dev.drop_data         = FALSE;
        req->dev.stream_offset     = 0;
#ifdef MPIDI_CH3_REQUEST_INIT
	MPIDI_CH3_REQUEST_INIT(req);
#endif
    }
    else
    {
	/* FIXME: This fails to fail if debugging is turned off */
	MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"unable to allocate a request");
    }
    
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_REQUEST_CREATE);
    return req;
}
Beispiel #13
0
int MPIR_Comm_commit(MPID_Comm * comm)
{
    int mpi_errno = MPI_SUCCESS;
    int num_local = -1, num_external = -1;
    int local_rank = -1, external_rank = -1;
    int *local_procs = NULL, *external_procs = NULL;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_COMM_COMMIT);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_COMM_COMMIT);

    /* It's OK to relax these assertions, but we should do so very
     * intentionally.  For now this function is the only place that we create
     * our hierarchy of communicators */
    MPIU_Assert(comm->node_comm == NULL);
    MPIU_Assert(comm->node_roots_comm == NULL);

    mpi_errno = set_collops(comm);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* Notify device of communicator creation */
    mpi_errno = MPID_Dev_comm_create_hook(comm);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    MPIR_Comm_map_free(comm);

    if (comm->comm_kind == MPID_INTRACOMM) {

        mpi_errno = MPIU_Find_local_and_external(comm,
                                                 &num_local, &local_rank, &local_procs,
                                                 &num_external, &external_rank, &external_procs,
                                                 &comm->intranode_table, &comm->internode_table);
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno) {
            if (MPIR_Err_is_fatal(mpi_errno))
                MPIR_ERR_POP(mpi_errno);

            /* Non-fatal errors simply mean that this communicator will not have
             * any node awareness.  Node-aware collectives are an optimization. */
            MPIU_DBG_MSG_P(COMM, VERBOSE, "MPIU_Find_local_and_external failed for comm_ptr=%p",
                           comm);
            if (comm->intranode_table)
                MPIU_Free(comm->intranode_table);
            if (comm->internode_table)
                MPIU_Free(comm->internode_table);

            mpi_errno = MPI_SUCCESS;
            goto fn_exit;
        }
        /* --END ERROR HANDLING-- */

        /* defensive checks */
        MPIU_Assert(num_local > 0);
        MPIU_Assert(num_local > 1 || external_rank >= 0);
        MPIU_Assert(external_rank < 0 || external_procs != NULL);

        /* if the node_roots_comm and comm would be the same size, then creating
         * the second communicator is useless and wasteful. */
        if (num_external == comm->remote_size) {
            MPIU_Assert(num_local == 1);
            goto fn_exit;
        }

        /* we don't need a local comm if this process is the only one on this node */
        if (num_local > 1) {
            mpi_errno = MPIR_Comm_create(&comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            comm->node_comm->context_id = comm->context_id + MPID_CONTEXT_INTRANODE_OFFSET;
            comm->node_comm->recvcontext_id = comm->node_comm->context_id;
            comm->node_comm->rank = local_rank;
            comm->node_comm->comm_kind = MPID_INTRACOMM;
            comm->node_comm->hierarchy_kind = MPID_HIERARCHY_NODE;
            comm->node_comm->local_comm = NULL;
            MPIU_DBG_MSG_D(CH3_OTHER, VERBOSE, "Create node_comm=%p\n", comm->node_comm);

            comm->node_comm->local_size = num_local;
            comm->node_comm->remote_size = num_local;

            MPIR_Comm_map_irregular(comm->node_comm, comm, local_procs,
                                    num_local, MPIR_COMM_MAP_DIR_L2L, NULL);

            mpi_errno = set_collops(comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            /* Notify device of communicator creation */
            mpi_errno = MPID_Dev_comm_create_hook(comm->node_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            /* don't call MPIR_Comm_commit here */

            MPIR_Comm_map_free(comm->node_comm);
        }


        /* this process may not be a member of the node_roots_comm */
        if (local_rank == 0) {
            mpi_errno = MPIR_Comm_create(&comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            comm->node_roots_comm->context_id = comm->context_id + MPID_CONTEXT_INTERNODE_OFFSET;
            comm->node_roots_comm->recvcontext_id = comm->node_roots_comm->context_id;
            comm->node_roots_comm->rank = external_rank;
            comm->node_roots_comm->comm_kind = MPID_INTRACOMM;
            comm->node_roots_comm->hierarchy_kind = MPID_HIERARCHY_NODE_ROOTS;
            comm->node_roots_comm->local_comm = NULL;

            comm->node_roots_comm->local_size = num_external;
            comm->node_roots_comm->remote_size = num_external;

            MPIR_Comm_map_irregular(comm->node_roots_comm, comm,
                                    external_procs, num_external, MPIR_COMM_MAP_DIR_L2L, NULL);

            mpi_errno = set_collops(comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            /* Notify device of communicator creation */
            mpi_errno = MPID_Dev_comm_create_hook(comm->node_roots_comm);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            /* don't call MPIR_Comm_commit here */

            MPIR_Comm_map_free(comm->node_roots_comm);
        }

        comm->hierarchy_kind = MPID_HIERARCHY_PARENT;
    }

  fn_exit:
    if (external_procs != NULL)
        MPIU_Free(external_procs);
    if (local_procs != NULL)
        MPIU_Free(local_procs);

    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_COMM_COMMIT);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #14
0
/*@
  MPID_Type_vector - create a vector datatype

Input Parameters:
+ count - number of blocks in vector
. blocklength - number of elements in each block
. stride - distance from beginning of one block to the next (see next
  parameter for units)
. strideinbytes - if nonzero, then stride is in bytes, otherwise stride
  is in terms of extent of oldtype
- oldtype - type (using handle) of datatype on which vector is based

Output Parameters:
. newtype - handle of new vector datatype

  Return Value:
  0 on success, MPI error code on failure.
@*/
int MPID_Type_vector(int count,
		     int blocklength,
		     MPI_Aint stride,
		     int strideinbytes,
		     MPI_Datatype oldtype,
		     MPI_Datatype *newtype)
{
    int mpi_errno = MPI_SUCCESS;
    int is_builtin, old_is_contig;
    MPI_Aint el_sz, old_sz;
    MPI_Datatype el_type;
    MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub, eff_stride;

    MPID_Datatype *new_dtp;

    if (count == 0) return MPID_Type_zerolen(newtype);

    /* allocate new datatype object and handle */
    new_dtp = (MPID_Datatype *) MPIU_Handle_obj_alloc(&MPID_Datatype_mem);
    if (!new_dtp) {
	/* --BEGIN ERROR HANDLING-- */
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					 "MPID_Type_vector", __LINE__,
					 MPI_ERR_OTHER, "**nomem", 0);
	return mpi_errno;
	/* --END ERROR HANDLING-- */
    }

    /* handle is filled in by MPIU_Handle_obj_alloc() */
    MPIU_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 0;
    new_dtp->attributes   = NULL;
    new_dtp->cache_id     = 0;
    new_dtp->name[0]      = 0;
    new_dtp->contents     = NULL;

    new_dtp->dataloop       = NULL;
    new_dtp->dataloop_size  = -1;
    new_dtp->dataloop_depth = -1;
    new_dtp->hetero_dloop       = NULL;
    new_dtp->hetero_dloop_size  = -1;
    new_dtp->hetero_dloop_depth = -1;

    is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN);

    if (is_builtin) {
	el_sz   = (MPI_Aint) MPID_Datatype_get_basic_size(oldtype);
	el_type = oldtype;

	old_lb        = 0;
	old_true_lb   = 0;
	old_ub        = el_sz;
	old_true_ub   = el_sz;
	old_sz        = el_sz;
	old_extent    = el_sz;
	old_is_contig = 1;

	new_dtp->size           = (MPI_Aint) count *
	                          (MPI_Aint) blocklength * el_sz;
	new_dtp->has_sticky_lb  = 0;
	new_dtp->has_sticky_ub  = 0;

	new_dtp->alignsize    = el_sz; /* ??? */
	new_dtp->n_builtin_elements   = count * blocklength;
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = count;

	eff_stride = (strideinbytes) ? stride : (stride * el_sz);
    }
    else /* user-defined base type (oldtype) */ {
	MPID_Datatype *old_dtp;

	MPID_Datatype_get_ptr(oldtype, old_dtp);
	el_sz   = old_dtp->builtin_element_size;
	el_type = old_dtp->basic_type;

	old_lb        = old_dtp->lb;
	old_true_lb   = old_dtp->true_lb;
	old_ub        = old_dtp->ub;
	old_true_ub   = old_dtp->true_ub;
	old_sz        = old_dtp->size;
	old_extent    = old_dtp->extent;
	old_is_contig = old_dtp->is_contig;

	new_dtp->size           = count * blocklength * old_dtp->size;
	new_dtp->has_sticky_lb  = old_dtp->has_sticky_lb;
	new_dtp->has_sticky_ub  = old_dtp->has_sticky_ub;

	new_dtp->alignsize    = old_dtp->alignsize;
	new_dtp->n_builtin_elements   = count * blocklength * old_dtp->n_builtin_elements;
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = old_dtp->max_contig_blocks * count * blocklength;

	eff_stride = (strideinbytes) ? stride : (stride * old_dtp->extent);
    }

    MPID_DATATYPE_VECTOR_LB_UB((MPI_Aint) count,
			       eff_stride,
			       (MPI_Aint) blocklength,
			       old_lb,
			       old_ub,
			       old_extent,
			       new_dtp->lb,
			       new_dtp->ub);
    new_dtp->true_lb = new_dtp->lb + (old_true_lb - old_lb);
    new_dtp->true_ub = new_dtp->ub + (old_true_ub - old_ub);
    new_dtp->extent  = new_dtp->ub - new_dtp->lb;

    /* new type is only contig for N types if old one was, and
     * size and extent of new type are equivalent, and stride is
     * equal to blocklength * size of old type.
     */
    if ((MPI_Aint)(new_dtp->size) == new_dtp->extent &&
	eff_stride == (MPI_Aint) blocklength * old_sz &&
	old_is_contig)
    {
	new_dtp->is_contig = 1;
        new_dtp->max_contig_blocks = 1;
    }
    else {
	new_dtp->is_contig = 0;
    }

    *newtype = new_dtp->handle;

    MPIU_DBG_MSG_P(DATATYPE,VERBOSE,"vector type %x created.",
		   new_dtp->handle);

    return mpi_errno;
}
Beispiel #15
0
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, 
	       int tag, MPID_Comm * comm, int context_offset,
               MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc=0;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                  "rank=%d, tag=%d, context=%d", 
                  rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
#if defined (_OSU_PSM_)
        goto skip_self_send; /* psm will internally do self-send, no special
                                handling is needed here */
#endif /* _OSU_PSM_ */          
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
			    context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq);
	goto fn_exit;
    }
#if defined (_OSU_PSM_)
skip_self_send:
#endif

    if (rank != MPI_PROC_NULL) {
        MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#ifdef ENABLE_COMM_OVERRIDES
        /* this needs to come before the sreq is created, since the override
         * function is responsible for creating its own request */
        if (vc->comm_ops && vc->comm_ops->isend)
        {
            mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
            goto fn_exit;
        }
#endif
    }

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);

    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);
    
    if (data_sz == 0)
    {
#if defined (_OSU_PSM_)
        goto eager_send;
#endif /* _OSU_PSM_ */
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
	sreq->dev.OnDataAvail = 0;
	    
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = sreq->handle;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIU_CALL(MPIDI_CH3,iSend(vc, sreq, eager_pkt, 
					      sizeof(*eager_pkt)));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */

	goto fn_exit;
    }

#if defined (_OSU_PSM_)
    if(HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
        sreq->dev.datatype_ptr = dt_ptr;
        MPID_Datatype_add_ref(dt_ptr);
        sreq->psm_flags |= PSM_NEED_DTYPE_RELEASE;
    }
    if(vc->force_eager)
        goto eager_send;
#endif /* _OSU_PSM_ */

#if defined(_OSU_MVAPICH_)
    int i;
    for (i = 0 ; i < rdma_num_extra_polls; i++)
    {
        if (rdma_global_ext_sendq_size > 1)
            MPID_Progress_test();
    }
#endif
    /* FIXME: flow control: limit number of outstanding eager messsages 
       containing data and need to be buffered by the receiver */
#if defined(_OSU_MVAPICH_)
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <=	vc->eager_max_msg_sz
        && !vc->force_rndv)
#else /* defined(_OSU_MVAPICH_) */
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <=	vc->eager_max_msg_sz)
#endif /* defined(_OSU_MVAPICH_) */
    {
#if defined (_OSU_PSM_)
eager_send:
#endif /* _OSU_PSM */
        if (dt_contig) 
        {
            mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, 
                                MPIDI_CH3_PKT_EAGER_SEND,
                                (char*)buf + dt_true_lb, 
                                data_sz, rank, tag, 
                                comm, context_offset );
        } 
        else 
        {
#if defined (_OSU_PSM_)
            sreq->psm_flags |= PSM_NON_BLOCKING_SEND;
#endif
            mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                          MPIDI_CH3_PKT_EAGER_SEND,
                                                          buf, count, datatype,
                                                          data_sz, rank, tag, 
                                                          comm, context_offset );
#if defined (_OSU_PSM_)
            goto fn_exit;
#endif            
            /* If we're not complete, then add a reference to the datatype */
            if (sreq && sreq->dev.OnDataAvail) {
                sreq->dev.datatype_ptr = dt_ptr;
                MPID_Datatype_add_ref(dt_ptr);
            }
        }
    }
    else
    {
	/* Note that the sreq was created above */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* FIXME: fill temporary IOV or pack temporary buffer after send to 
	   hide some latency.  This requires synchronization
           because the CTS packet could arrive and be processed before the 
	   above iStartmsg completes (depending on the progress
           engine, threads, etc.). */
#if defined(_OSU_MVAPICH_)
        /* rndv transfers need to process CTS packet to initiate the actual RDMA transfer */
        MPID_Progress_test();
#endif /* defined(_OSU_MVAPICH_) */
	
	if (sreq && dt_ptr != NULL)
	{
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

#if defined(_OSU_MVAPICH_)
    for (i = 0 ; i < rdma_num_extra_polls; i++)
    {
        if (rdma_global_ext_sendq_size > 1)
            MPID_Progress_test();
    }
#endif

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,
    {
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
Beispiel #16
0
int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
		MPID_Request ** request)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
                rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq);
	goto fn_exit;
    }

    if (rank != MPI_PROC_NULL) {
        MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#ifdef ENABLE_COMM_OVERRIDES
        /* this needs to come before the sreq is created, since the override
         * function is responsible for creating its own request */
        if (vc->comm_ops && vc->comm_ops->irsend)
        {
            mpi_errno = vc->comm_ops->irsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
            goto fn_exit;
        }
#endif
    }
    
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND);
    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
    
    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }
    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND);
    ready_pkt->match.parts.rank = comm->rank;
    ready_pkt->match.parts.tag = tag;
    ready_pkt->match.parts.context_id = comm->context_id + context_offset;
    ready_pkt->sender_req_id = MPI_REQUEST_NULL;
    ready_pkt->data_sz = data_sz;

    if (data_sz == 0)
    {
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");

	sreq->dev.OnDataAvail = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(ready_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPID_THREAD_CS_ENTER(POBJ, vc->pobj_mutex);
	mpi_errno = MPIDI_CH3_iSend(vc, sreq, ready_pkt, sizeof(*ready_pkt));
	MPID_THREAD_CS_EXIT(POBJ, vc->pobj_mutex);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
            MPID_Request_release(sreq);
	    sreq = NULL;
            MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */
	goto fn_exit;
    }
    
    if (vc->ready_eager_max_msg_sz < 0 || data_sz + sizeof(MPIDI_CH3_Pkt_ready_send_t) <= vc->ready_eager_max_msg_sz) {
        if (dt_contig) {
            mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq,
                                                    MPIDI_CH3_PKT_READY_SEND,
                                                    (char*)buf + dt_true_lb,
                                                    data_sz, rank, tag,
                                                    comm, context_offset );
            
        }
        else {
            mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq,
                                                      MPIDI_CH3_PKT_READY_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag,
                                                      comm, context_offset );
            /* If we're not complete, then add a reference to the datatype */
            if (sreq && sreq->dev.OnDataAvail) {
                sreq->dev.datatype_ptr = dt_ptr;
                MPID_Datatype_add_ref(dt_ptr);
            }
        }
    } else {
 	/* Do rendezvous.  This will be sent as a regular send not as
           a ready send, so the receiver won't know to send an error
           if the receive has not been posted */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm,
                                     context_offset );
	if (sreq && dt_ptr != NULL) {
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
Beispiel #17
0
int MPID_Ssend(const void * buf, MPI_Aint count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
	       MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
              "rank=%d, tag=%d, context=%d", 
              rank, tag, comm->context_id + context_offset));

    /* Check to make sure the communicator hasn't already been revoked */
    if (comm->revoked &&
            MPIR_AGREE_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask) &&
            MPIR_SHRINK_TAG != MPIR_TAG_MASK_ERROR_BITS(tag & ~MPIR_Process.tagged_coll_mask)) {
        MPIR_ERR_SETANDJUMP(mpi_errno,MPIX_ERR_REVOKED,"**revoked");
    }

    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SSEND, 
				     &sreq);
	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    /* --BEGIN ERROR HANDLING-- */
	    if (sreq != NULL && MPID_cc_get(sreq->cc) != 0)
	    {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
						 "**dev|selfsenddeadlock", 0);
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}
#	endif
	goto fn_exit;
    }
    
    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->ssend)
    {
	mpi_errno = vc->comm_ops->ssend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);

    if (data_sz == 0)
    {
	mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, 
					     context_offset );
	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold)
    {
	mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count,
                                                      datatype, data_sz, 
                                                      dt_contig, dt_true_lb,
                                                      rank, tag, comm, 
                                                      context_offset );
    }
    else
    {
	/* Note that the sreq was created above */
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* Note that we don't increase the ref cound on the datatype
	   because this is a blocking call, and the calling routine 
	   must wait until sreq completes */
    }

  fn_fail:
  fn_exit:
    *request = sreq;
    
    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{if (sreq!=NULL) {
            MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
			   "request allocated, handle=0x%08x", sreq->handle);}});
Beispiel #18
0
static int send_msg(ptl_hdr_data_t ssend_flag, struct MPIDI_VC *vc, const void *buf, MPI_Aint count, MPI_Datatype datatype, int dest,
                    int tag, MPID_Comm *comm, int context_offset, struct MPID_Request **request)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_nem_ptl_vc_area *const vc_ptl = VC_PTL(vc);
    int ret;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype *dt_ptr;
    MPID_Request *sreq = NULL;
    ptl_me_t me;
    int initial_iov_count, remaining_iov_count;
    ptl_md_t md;
    MPI_Aint last;
    MPIU_CHKPMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_SEND_MSG);

    MPIDI_FUNC_ENTER(MPID_STATE_SEND_MSG);

    MPID_nem_ptl_request_create_sreq(sreq, mpi_errno, comm);
    sreq->dev.match.parts.rank = dest;
    sreq->dev.match.parts.tag = tag;
    sreq->dev.match.parts.context_id = comm->context_id + context_offset;
    sreq->ch.vc = vc;

    if (!vc_ptl->id_initialized) {
        mpi_errno = MPID_nem_ptl_init_id(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    MPIU_DBG_MSG_FMT(CH3_CHANNEL, VERBOSE, (MPIU_DBG_FDEST, "count="MPI_AINT_FMT_DEC_SPEC" datatype=%#x contig=%d data_sz=%lu", count, datatype, dt_contig, data_sz));

    if (data_sz <= PTL_LARGE_THRESHOLD) {
        /* Small message.  Send all data eagerly */
        if (dt_contig) {
            void *start = (char *)buf + dt_true_lb;
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small contig message");
            REQ_PTL(sreq)->event_handler = handler_send;
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "&REQ_PTL(sreq)->event_handler = %p", &(REQ_PTL(sreq)->event_handler));
            if (start == NULL)
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)&dummy, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            else
                ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)start, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.nid = %#x", vc_ptl->id.phys.nid);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "id.pid = %#x", vc_ptl->id.phys.pid);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "sreq = %p", sreq);
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "vc_ptl->pt = %d", vc_ptl->pt);
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "REQ_PTL(sreq)->event_handler = %p", REQ_PTL(sreq)->event_handler);
            goto fn_exit;
        }

        /* noncontig data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Small noncontig message");
        sreq->dev.segment_ptr = MPID_Segment_alloc();
        MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        sreq->dev.segment_size = data_sz;

        last = sreq->dev.segment_size;
        sreq->dev.iov_count = MPL_IOV_LIMIT;
        MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

        if (last == sreq->dev.segment_size) {
            /* IOV is able to describe entire message */
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    entire message fits in IOV");
            md.start = sreq->dev.iov;
            md.length = sreq->dev.iov_count;
            md.options = PTL_IOVEC;
            md.eq_handle = MPIDI_nem_ptl_origin_eq;
            md.ct_handle = PTL_CT_NONE;
            ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

            REQ_PTL(sreq)->event_handler = handler_send;
            ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, data_sz, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                        NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                        NPTL_HEADER(ssend_flag, data_sz));
            MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
            DBG_MSG_PUT("sreq", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
            goto fn_exit;
        }

        /* IOV is not long enough to describe entire message */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    IOV too long: using bounce buffer");
        MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->chunk_buffer[0], void *, data_sz, mpi_errno, "chunk_buffer");
        MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
        sreq->dev.segment_first = 0;
        last = data_sz;
        MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, REQ_PTL(sreq)->chunk_buffer[0]);
        MPIU_Assert(last == sreq->dev.segment_size);
        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)REQ_PTL(sreq)->chunk_buffer[0], data_sz, PTL_NO_ACK_REQ,
                                    vc_ptl->id, vc_ptl->pt, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", data_sz, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag, data_sz));
        goto fn_exit;
    }

    /* Large message.  Send first chunk of data and let receiver get the rest */
    if (dt_contig) {
        /* create ME for buffer so receiver can issue a GET for the data */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large contig message");
        big_meappend((char *)buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc,
                     NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), sreq);

        REQ_PTL(sreq)->event_handler = handler_send;
        ret = MPID_nem_ptl_rptl_put(MPIDI_nem_ptl_global_md, (ptl_size_t)((char *)buf + dt_true_lb), PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                    NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                    NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
        DBG_MSG_PUT("global", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
        goto fn_exit;
    }

    /* Large noncontig data */
    MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "Large noncontig message");
    sreq->dev.segment_ptr = MPID_Segment_alloc();
    MPIR_ERR_CHKANDJUMP1(sreq->dev.segment_ptr == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc");
    MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0);
    sreq->dev.segment_first = 0;
    sreq->dev.segment_size = data_sz;

    last = PTL_LARGE_THRESHOLD;
    sreq->dev.iov_count = MPL_IOV_LIMIT;
    MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.iov, &sreq->dev.iov_count);

    initial_iov_count = sreq->dev.iov_count;
    sreq->dev.segment_first = last;

    if (last == PTL_LARGE_THRESHOLD) {
        /* first chunk of message fits into IOV */
        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    first chunk fits in IOV");
        if (initial_iov_count < MPL_IOV_LIMIT) {
            /* There may be space for the rest of the message in this IOV */
            sreq->dev.iov_count = MPL_IOV_LIMIT - sreq->dev.iov_count;
            last = sreq->dev.segment_size;

            MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last,
                                     &sreq->dev.iov[initial_iov_count], &sreq->dev.iov_count);
            remaining_iov_count = sreq->dev.iov_count;

            if (last == sreq->dev.segment_size && last <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) {
                /* Entire message fit in one IOV */
                int was_incomplete;

                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "    rest of message fits in one IOV");
                /* Create ME for remaining data */
                me.start = &sreq->dev.iov[initial_iov_count];
                me.length = remaining_iov_count;
                me.ct_handle = PTL_CT_NONE;
                me.uid = PTL_UID_ANY;
                me.options = ( PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_USE_ONCE | PTL_ME_IS_ACCESSIBLE | PTL_ME_EVENT_LINK_DISABLE |
                               PTL_ME_EVENT_UNLINK_DISABLE | PTL_IOVEC );
                me.match_id = vc_ptl->id;
                me.match_bits = NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank);
                me.ignore_bits = 0;
                me.min_free = 0;

                MPIU_CHKPMEM_MALLOC(REQ_PTL(sreq)->get_me_p, ptl_handle_me_t *, sizeof(ptl_handle_me_t), mpi_errno, "get_me_p");

                ret = PtlMEAppend(MPIDI_nem_ptl_ni, MPIDI_nem_ptl_get_pt, &me, PTL_PRIORITY_LIST, sreq,
                                  &REQ_PTL(sreq)->get_me_p[0]);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmeappend", "**ptlmeappend %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_MEAPPEND("CTL", vc->pg_rank, me, sreq);
                /* increment the cc for the get operation */
                MPIDI_CH3U_Request_increment_cc(sreq, &was_incomplete);
                MPIU_Assert(was_incomplete);

                /* Create MD for first chunk */
                md.start = sreq->dev.iov;
                md.length = initial_iov_count;
                md.options = PTL_IOVEC;
                md.eq_handle = MPIDI_nem_ptl_origin_eq;
                md.ct_handle = PTL_CT_NONE;
                ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(sreq)->md);
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret));

                REQ_PTL(sreq)->event_handler = handler_send;
                ret = MPID_nem_ptl_rptl_put(REQ_PTL(sreq)->md, 0, PTL_LARGE_THRESHOLD, PTL_NO_ACK_REQ, vc_ptl->id, vc_ptl->pt,
                                            NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), 0, sreq,
                                            NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlput", "**ptlput %s", MPID_nem_ptl_strerror(ret));
                DBG_MSG_PUT("req", PTL_LARGE_THRESHOLD, vc->pg_rank, NPTL_MATCH(tag, comm->context_id + context_offset, comm->rank), NPTL_HEADER(ssend_flag | NPTL_LARGE, data_sz));
                goto fn_exit;
            }
Beispiel #19
0
int MPID_Ssend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
	       MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq = NULL;
    MPIDI_VC_t * vc;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_SSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_SSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
              "rank=%d, tag=%d, context=%d", 
              rank, tag, comm->context_id + context_offset));

    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
#if defined(_OSU_PSM_)
    goto skip_self_send;     /* PSM internally, will optimize self-send */
#endif
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
				     context_offset, MPIDI_REQUEST_TYPE_SSEND, 
				     &sreq);
	/* In the single threaded case, sending to yourself will cause 
	   deadlock.  Note that in the runtime-thread case, this check
	   will not be made (long-term FIXME) */
#       ifndef MPICH_IS_THREADED
	{
	    /* --BEGIN ERROR HANDLING-- */
	    if (sreq != NULL && sreq->cc != 0)
	    {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
						 "**dev|selfsenddeadlock", 0);
		goto fn_exit;
	    }
	    /* --END ERROR HANDLING-- */
	}
#	endif
	goto fn_exit;
    }
#if defined (_OSU_PSM_)
skip_self_send:
#endif        
    if (rank == MPI_PROC_NULL)
    {
	goto fn_exit;
    }

    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->ssend)
    {
	mpi_errno = vc->comm_ops->ssend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif

    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);

    if (data_sz == 0)
    {
#if defined (_OSU_PSM_)
    goto psm_ssend;
#endif
	mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, 
					     context_offset );
	goto fn_exit;
    }
#if defined (_OSU_PSM_)
psm_ssend:
    sreq->psm_flags |= PSM_SYNC_SEND;
    if(dt_contig) {
        PSMSG(fprintf(stderr, "psm Sync send\n"));
        mpi_errno = MPIDI_CH3_EagerContigSend(&sreq, MPIDI_CH3_PKT_EAGER_SEND,
                       (char *)buf + dt_true_lb, data_sz, rank, tag, comm,
                        context_offset);
    } else {
        PSMSG(fprintf(stderr, "psm NC-Sync send\n"));
        mpi_errno = MPIDI_CH3_EagerNoncontigSend(&sreq,
                        MPIDI_CH3_PKT_EAGER_SEND, buf, count, datatype, data_sz,
                        rank, tag, comm, context_offset);
    }
    goto fn_exit;
#endif /* _OSU_PSM_ */
    
#if defined(_OSU_MVAPICH_)
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= vc->eager_max_msg_sz
        && ! vc->force_rndv)
#else /* defined(_OSU_MVAPICH_) */
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= vc->eager_max_msg_sz)
#endif /* defined(_OSU_MVAPICH_) */
    {
	mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count,
                                                      datatype, data_sz, 
                                                      dt_contig, dt_true_lb,
                                                      rank, tag, comm, 
                                                      context_offset );
    }
    else
    {
	/* Note that the sreq was created above */
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* Note that we don't increase the ref cound on the datatype
	   because this is a blocking call, and the calling routine 
	   must wait until sreq completes */
    }

  fn_exit:
    *request = sreq;
    
    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{if (sreq!=NULL) {
            MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
			   "request allocated, handle=0x%08x", sreq->handle);}});
Beispiel #20
0
int MPID_nem_scif_send_queued(MPIDI_VC_t * vc, reqq_t * send_queue)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Request *sreq;
    MPIDI_msg_sz_t offset;
    MPID_IOV *iov;
    int complete;
    uint64_t seqno;
    MPID_nem_scif_vc_area *vc_scif = VC_SCIF(vc);
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);

    MPIU_Assert(vc != NULL);

    if (MPIDI_CH3I_Sendq_empty(*send_queue)) {
        if (vc_scif->terminate)
            MPID_nem_scif_vc_terminated(vc);
        goto fn_exit;
    }
    while (!MPIDI_CH3I_Sendq_empty(*send_queue)) {
        sreq = MPIDI_CH3I_Sendq_head(*send_queue);
        seqno = RQ_SCIF(sreq)->seqno;
        complete = 0;
        if (seqno) {
            /* We were just waiting for DMA to finish */
            complete = MPID_nem_scif_chk_seqno(&vc_scif->sc->csend, seqno);
        }
        else if (MPID_nem_scif_poll_send(vc_scif->sc->fd, &vc_scif->sc->csend)) {
            MPIU_DBG_MSG_P(CH3_CHANNEL, VERBOSE, "Sending %p", sreq);

            iov = &sreq->dev.iov[sreq->dev.iov_offset];

            offset = MPID_nem_scif_writev(vc_scif->sc->fd, &vc_scif->sc->csend,
                                          iov, sreq->dev.iov_count, &seqno);
            MPIU_ERR_CHKANDJUMP1(offset <= 0, mpi_errno, MPI_ERR_OTHER,
                                 "**scif_writev", "**scif_writev %s", MPIU_Strerror(errno));
            MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "write " MPIDI_MSG_SZ_FMT, offset);
            RQ_SCIF(sreq)->seqno = seqno;
            complete = 1;
            for (iov = &sreq->dev.iov[sreq->dev.iov_offset];
                 iov < &sreq->dev.iov[sreq->dev.iov_offset + sreq->dev.iov_count]; ++iov) {
                if (offset < iov->MPID_IOV_LEN) {
                    iov->MPID_IOV_BUF = (char *) iov->MPID_IOV_BUF + offset;
                    iov->MPID_IOV_LEN -= offset;
                    /* iov_count should be equal to the number of iov's remaining */
                    sreq->dev.iov_count -= ((iov - sreq->dev.iov) - sreq->dev.iov_offset);
                    sreq->dev.iov_offset = iov - sreq->dev.iov;
                    complete = 0;
                    seqno = 0;
                    break;
                }
                offset -= iov->MPID_IOV_LEN;
            }
            if (seqno)
                complete = 0;
        }
        if (!complete) {
            /* writev couldn't write the entire iov, give up for now */
            break;
        }
        else {
            /* sent whole message */
            int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *);

            RQ_SCIF(sreq)->seqno = 0;
            reqFn = sreq->dev.OnDataAvail;
            if (!reqFn) {
                MPIU_Assert(MPIDI_Request_get_type(sreq) != MPIDI_REQUEST_TYPE_GET_RESP);
                MPIDI_CH3U_Request_complete(sreq);
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                MPIDI_CH3I_Sendq_dequeue(send_queue, &sreq);
                continue;
            }

            complete = 0;
            mpi_errno = reqFn(vc, sreq, &complete);
            if (mpi_errno)
                MPIU_ERR_POP(mpi_errno);

            if (complete) {
                MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");
                MPIDI_CH3I_Sendq_dequeue(send_queue, &sreq);
                continue;
            }
            sreq->dev.iov_offset = 0;
        }
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_SCIF_SEND_QUEUED);
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Beispiel #21
0
int MPID_Isend(const void * buf, int count, MPI_Datatype datatype, int rank, 
	       int tag, MPID_Comm * comm, int context_offset,
               MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc=0;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_ISEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                  "rank=%d, tag=%d, context=%d", 
                  rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, 
			    context_offset, MPIDI_REQUEST_TYPE_SEND, &sreq);
	goto fn_exit;
    }

    if (rank != MPI_PROC_NULL) {
        MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
#ifdef ENABLE_COMM_OVERRIDES
        /* this needs to come before the sreq is created, since the override
         * function is responsible for creating its own request */
        if (vc->comm_ops && vc->comm_ops->isend)
        {
            mpi_errno = vc->comm_ops->isend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
            goto fn_exit;
        }
#endif
    }

    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND);

    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }

    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, 
			    dt_true_lb);
    
    if (data_sz == 0)
    {
	MPIDI_CH3_Pkt_t upkt;
	MPIDI_CH3_Pkt_eager_send_t * const eager_pkt = &upkt.eager_send;

	MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
	sreq->dev.OnDataAvail = 0;
	    
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");
	MPIDI_Pkt_init(eager_pkt, MPIDI_CH3_PKT_EAGER_SEND);
	eager_pkt->match.parts.rank = comm->rank;
	eager_pkt->match.parts.tag = tag;
	eager_pkt->match.parts.context_id = comm->context_id + context_offset;
	eager_pkt->sender_req_id = sreq->handle;
	eager_pkt->data_sz = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(eager_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIDI_CH3_iSend(vc, sreq, eager_pkt, sizeof(*eager_pkt));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */

	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    /* FIXME: flow control: limit number of outstanding eager messages
       containing data and need to be buffered by the receiver */
    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_send_t) <= eager_threshold)
    {
	if (dt_contig)
	{
	    mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, 
						    MPIDI_CH3_PKT_EAGER_SEND,
						    (char*)buf + dt_true_lb, 
						    data_sz, rank, tag, 
						    comm, context_offset );
	}
	else
	{
	    mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                      MPIDI_CH3_PKT_EAGER_SEND,
                                                      buf, count, datatype,
                                                      data_sz, rank, tag, 
                                                      comm, context_offset );
	    /* If we're not complete, then add a reference to the datatype */
	    if (sreq && sreq->dev.OnDataAvail) {
		sreq->dev.datatype_ptr = dt_ptr;
		MPID_Datatype_add_ref(dt_ptr);
	    }
	}
    }
    else
    {
	/* Note that the sreq was created above */
	MPIDI_Request_set_msg_type( sreq, MPIDI_REQUEST_RNDV_MSG );
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	/* FIXME: fill temporary IOV or pack temporary buffer after send to 
	   hide some latency.  This requires synchronization
           because the CTS packet could arrive and be processed before the 
	   above iStartmsg completes (depending on the progress
           engine, threads, etc.). */
	
	if (sreq && dt_ptr != NULL)
	{
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,
    {
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
int MPID_Type_create_resized(MPI_Datatype oldtype,
			     MPI_Aint lb,
			     MPI_Aint extent,
			     MPI_Datatype *newtype_p)
{
    MPID_Datatype *new_dtp;

    new_dtp = (MPID_Datatype *) MPIU_Handle_obj_alloc(&MPID_Datatype_mem);
    /* --BEGIN ERROR HANDLING-- */
    if (!new_dtp) return MPIDI_Type_create_resized_memory_error();
    /* --END ERROR HANDLING-- */

    /* handle is filled in by MPIU_Handle_obj_alloc() */
    MPIU_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 0;
    new_dtp->attributes   = 0;
    new_dtp->cache_id     = 0;
    new_dtp->name[0]      = 0;
    new_dtp->contents     = 0;

    new_dtp->dataloop       = NULL;
    new_dtp->dataloop_size  = -1;
    new_dtp->dataloop_depth = -1;
    new_dtp->hetero_dloop       = NULL;
    new_dtp->hetero_dloop_size  = -1;
    new_dtp->hetero_dloop_depth = -1;

    /* if oldtype is a basic, we build a contiguous dataloop of count = 1 */
    if (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN)
    {
	int oldsize = MPID_Datatype_get_basic_size(oldtype);

	new_dtp->size           = oldsize;
	new_dtp->has_sticky_ub  = 0;
	new_dtp->has_sticky_lb  = 0;
	new_dtp->dataloop_depth = 1;
	new_dtp->true_lb        = 0;
	new_dtp->lb             = lb;
	new_dtp->true_ub        = oldsize;
	new_dtp->ub             = lb + extent;
	new_dtp->extent         = extent;
	new_dtp->alignsize      = oldsize; /* FIXME ??? */
	new_dtp->n_builtin_elements     = 1;
	new_dtp->builtin_element_size   = oldsize;
	new_dtp->is_contig      = (extent == oldsize) ? 1 : 0;
        new_dtp->basic_type         = oldtype;
	new_dtp->max_contig_blocks = 3;  /* lb, data, ub */
    }
    else
    {
	/* user-defined base type */
	MPID_Datatype *old_dtp;

	MPID_Datatype_get_ptr(oldtype, old_dtp);

	new_dtp->size           = old_dtp->size;
	new_dtp->has_sticky_ub  = 0;
	new_dtp->has_sticky_lb  = 0;
	new_dtp->dataloop_depth = old_dtp->dataloop_depth;
	new_dtp->true_lb        = old_dtp->true_lb;
	new_dtp->lb             = lb;
	new_dtp->true_ub        = old_dtp->true_ub;
	new_dtp->ub             = lb + extent;
	new_dtp->extent         = extent;
	new_dtp->alignsize      = old_dtp->alignsize;
	new_dtp->n_builtin_elements     = old_dtp->n_builtin_elements;
	new_dtp->builtin_element_size   = old_dtp->builtin_element_size;
        new_dtp->basic_type         = old_dtp->basic_type;

	new_dtp->is_contig      =
	    (extent == old_dtp->size) ? old_dtp->is_contig : 0;
	new_dtp->max_contig_blocks = old_dtp->max_contig_blocks;
    }

    *newtype_p = new_dtp->handle;

    MPIU_DBG_MSG_P(DATATYPE,VERBOSE,"resized type %x created.", 
		   new_dtp->handle);

    return MPI_SUCCESS;
}
int MPID_Irsend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
		MPID_Request ** request)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_ready_send_t * const ready_pkt = &upkt.ready_send;
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int mpi_errno = MPI_SUCCESS;    
    MPIDI_STATE_DECL(MPID_STATE_MPID_IRSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_IRSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                "rank=%d, tag=%d, context=%d", 
                rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_RSEND, &sreq);
	goto fn_exit;
    }
    
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_RSEND);
    MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_EAGER_MSG);
    
    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }
    
    MPIDI_Comm_get_vc_set_active(comm, rank, &vc);

#ifdef ENABLE_COMM_OVERRIDES
    if (vc->comm_ops && vc->comm_ops->irsend)
    {
	mpi_errno = vc->comm_ops->irsend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	goto fn_exit;
    }
#endif
    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);

    MPIDI_Pkt_init(ready_pkt, MPIDI_CH3_PKT_READY_SEND);
    ready_pkt->match.parts.rank = comm->rank;
    ready_pkt->match.parts.tag = tag;
    ready_pkt->match.parts.context_id = comm->context_id + context_offset;
    ready_pkt->sender_req_id = MPI_REQUEST_NULL;
    ready_pkt->data_sz = data_sz;

    if (data_sz == 0)
    {
	MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"sending zero length message");

	sreq->dev.OnDataAvail = 0;
	
	MPIDI_VC_FAI_send_seqnum(vc, seqnum);
	MPIDI_Pkt_set_seqnum(ready_pkt, seqnum);
	MPIDI_Request_set_seqnum(sreq, seqnum);
	
	MPIU_THREAD_CS_ENTER(CH3COMM,vc);
	mpi_errno = MPIU_CALL(MPIDI_CH3,iSend(vc, sreq, ready_pkt, sizeof(*ready_pkt)));
	MPIU_THREAD_CS_EXIT(CH3COMM,vc);
	/* --BEGIN ERROR HANDLING-- */
	if (mpi_errno != MPI_SUCCESS)
	{
	    MPIU_Object_set_ref(sreq, 0);
	    MPIDI_CH3_Request_destroy(sreq);
	    sreq = NULL;
            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**ch3|eagermsg");
	    goto fn_exit;
	}
	/* --END ERROR HANDLING-- */
	goto fn_exit;
    }
    
    if (dt_contig) {
	mpi_errno = MPIDI_CH3_EagerContigIsend( &sreq, 
						MPIDI_CH3_PKT_READY_SEND,
						(char*)buf + dt_true_lb, 
						data_sz, rank, tag, 
						comm, context_offset );

    }
    else {
	mpi_errno = MPIDI_CH3_EagerNoncontigSend( &sreq, 
                                                  MPIDI_CH3_PKT_READY_SEND,
                                                  buf, count, datatype,
                                                  data_sz, rank, tag, 
                                                  comm, context_offset );
	/* If we're not complete, then add a reference to the datatype */
	if (sreq && sreq->dev.OnDataAvail) {
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }
 
  fn_exit:
    *request = sreq;

    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,{
	if (sreq != NULL)
	{
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,"request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  );
Beispiel #24
0
/*
 * Carve off size bytes from buffer p and leave the remainder
 * on the avail list.  Handle the head/tail cases. 
 * If there isn't enough left of p, remove the entire segment from
 * the avail list.
 */
static void MPIR_Bsend_take_buffer( MPIR_Bsend_data_t *p, int size  )
{
    MPIR_Bsend_data_t *prev;
    int         alloc_size;

    /* Compute the remaining size.  This must include any padding 
       that must be added to make the new block properly aligned */
    alloc_size = size;
    if (alloc_size & 0x7) 
	alloc_size += (8 - (alloc_size & 0x7));
    /* alloc_size is the amount of space (out of size) that we will 
       allocate for this buffer. */

    MPIU_DBG_MSG_FMT(BSEND,TYPICAL,(MPIU_DBG_FDEST,
                                    "Taking %d bytes from a block with %llu bytes\n",
                                    alloc_size, (unsigned long long) p->total_size ));

    /* Is there enough space left to create a new block? */
    if (alloc_size + (int)BSENDDATA_HEADER_TRUE_SIZE + MIN_BUFFER_BLOCK <= p->size) {
	/* Yes, the available space (p->size) is large enough to 
	   carve out a new block */
	MPIR_Bsend_data_t *newp;
	
	MPIU_DBG_MSG_P(BSEND,TYPICAL,"Breaking block into used and allocated at %p", p );
	newp = (MPIR_Bsend_data_t *)( (char *)p + BSENDDATA_HEADER_TRUE_SIZE + 
				alloc_size );
	newp->total_size = p->total_size - alloc_size - 
	    BSENDDATA_HEADER_TRUE_SIZE;
	newp->size = newp->total_size - BSENDDATA_HEADER_TRUE_SIZE;
	newp->msg.msgbuf = (char *)newp + BSENDDATA_HEADER_TRUE_SIZE;

	/* Insert this new block after p (we'll remove p from the avail list
	   next) */
	newp->next = p->next;
	newp->prev = p;
	if (p->next) {
	    p->next->prev = newp;
	}
	p->next       = newp;
	p->total_size = (char *)newp - (char*)p;
	p->size       = p->total_size - BSENDDATA_HEADER_TRUE_SIZE;

	MPIU_DBG_MSG_FMT(BSEND,TYPICAL,(MPIU_DBG_FDEST,
                                        "broken blocks p (%llu) and new (%llu)\n",
                                        (unsigned long long) p->total_size,
                                        (unsigned long long) newp->total_size ));
    }

    /* Remove p from the avail list and add it to the active list */
    prev = p->prev;
    if (prev) {
	prev->next = p->next;
    }
    else {
	BsendBuffer.avail = p->next;
    }

    if (p->next) {
	p->next->prev = p->prev;
    }
	
    if (BsendBuffer.active) {
	BsendBuffer.active->prev = p;
    }
    p->next	       = BsendBuffer.active;
    p->prev	       = 0;
    BsendBuffer.active = p;

    MPIU_DBG_MSG_P(BSEND,VERBOSE,"segment %p now head of active",p); 
    MPIU_DBG_MSG(BSEND,TYPICAL,"At end of take buffer" );
    MPIU_DBG_STMT(BSEND,TYPICAL,MPIR_Bsend_dump());
}
Beispiel #25
0
int MPID_Issend(const void * buf, int count, MPI_Datatype datatype, int rank, int tag, MPID_Comm * comm, int context_offset,
		MPID_Request ** request)
{
    MPIDI_msg_sz_t data_sz;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPID_Datatype * dt_ptr;
    MPID_Request * sreq;
    MPIDI_VC_t * vc=0;
#if defined(MPID_USE_SEQUENCE_NUMBERS)
    MPID_Seqnum_t seqnum;
#endif    
    int eager_threshold = -1;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_ISSEND);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ISSEND);

    MPIU_DBG_MSG_FMT(CH3_OTHER,VERBOSE,(MPIU_DBG_FDEST,
                 "rank=%d, tag=%d, context=%d", 
                 rank, tag, comm->context_id + context_offset));
    
    if (rank == comm->rank && comm->comm_kind != MPID_INTERCOMM)
    {
	mpi_errno = MPIDI_Isend_self(buf, count, datatype, rank, tag, comm, context_offset, MPIDI_REQUEST_TYPE_SSEND, &sreq);
	goto fn_exit;
    }

    if (rank != MPI_PROC_NULL)
    {
       MPIDI_Comm_get_vc_set_active(comm, rank, &vc);
        /* this needs to come before the sreq is created, since the override */
        /* function is responsible for creating its own request */       
#ifdef ENABLE_COMM_OVERRIDES
       if (vc->comm_ops && vc->comm_ops->issend)
       {
	  mpi_errno = vc->comm_ops->issend( vc, buf, count, datatype, rank, tag, comm, context_offset, &sreq);
	  goto fn_exit;
       }
#endif
    }   
   
    MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit);
    MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SSEND);
    
    if (rank == MPI_PROC_NULL)
    {
	MPIU_Object_set_ref(sreq, 1);
        MPID_cc_set(&sreq->cc, 0);
	goto fn_exit;
    }
    
    MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, dt_true_lb);
    
    if (data_sz == 0)
    {
	mpi_errno = MPIDI_CH3_EagerSyncZero( &sreq, rank, tag, comm, 
					     context_offset );
	goto fn_exit;
    }

    MPIDI_CH3_GET_EAGER_THRESHOLD(&eager_threshold, comm, vc);

    if (data_sz + sizeof(MPIDI_CH3_Pkt_eager_sync_send_t) <= eager_threshold)
    {
	mpi_errno = MPIDI_CH3_EagerSyncNoncontigSend( &sreq, buf, count,
                                                      datatype, data_sz, 
                                                      dt_contig, dt_true_lb,
                                                      rank, tag, comm, 
                                                      context_offset );
	/* If we're not complete, then add a reference to the datatype */
	if (sreq && sreq->dev.OnDataAvail) {
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }
    else
    {
	/* Note that the sreq was created above */
	MPIDI_Request_set_msg_type(sreq, MPIDI_REQUEST_RNDV_MSG);
	mpi_errno = vc->rndvSend_fn( &sreq, buf, count, datatype, dt_contig,
                                     data_sz, dt_true_lb, rank, tag, comm, 
                                     context_offset );
	
	/* FIXME: fill temporary IOV or pack temporary buffer after send to 
	   hide some latency.  This requires synchronization
           because the CTS packet could arrive and be processed before the 
	   above iStartmsg completes (depending on the progress
           engine, threads, etc.). */
	
	if (sreq && dt_ptr != NULL)
	{
	    sreq->dev.datatype_ptr = dt_ptr;
	    MPID_Datatype_add_ref(dt_ptr);
	}
    }

  fn_exit:
    *request = sreq;
    
    MPIU_DBG_STMT(CH3_OTHER,VERBOSE,
    {
	if (sreq != NULL) {
	    MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
			   "request allocated, handle=0x%08x", sreq->handle);
	}
    }
		  )