Beispiel #1
0
int
ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module,
                             ompi_osc_pt2pt_sendreq_t *sendreq,
                             ompi_osc_pt2pt_reply_header_t *header,
                             void *payload)
{
    int ret = OMPI_SUCCESS;

    /* receive into user buffer */
    if (header->hdr_msg_length > 0) {
        /* short message.  woo! */

        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;

        iov.iov_len = header->hdr_msg_length;
        iov.iov_base = (IOVBASE_TYPE*)payload;
        max_data = iov.iov_len;
        ompi_convertor_unpack(&sendreq->req_origin_convertor,
                              &iov,
                              &iov_count,
                              &max_data );

        OPAL_THREAD_ADD32(&(sendreq->req_module->p2p_num_pending_out), -1);
        ompi_osc_pt2pt_sendreq_free(sendreq);
    } else {
        ompi_osc_pt2pt_longreq_t *longreq;
        ompi_osc_pt2pt_longreq_alloc(&longreq);

        longreq->req_comp_cb = ompi_osc_pt2pt_replyreq_recv_long_cb;
        longreq->req_comp_cbdata = sendreq;
        longreq->req_module = module;

        /* BWB - FIX ME -  George is going to kill me for this */
        ret = mca_pml.pml_irecv(sendreq->req_origin_convertor.pBaseBuf,
                                sendreq->req_origin_convertor.count,
                                sendreq->req_origin_datatype,
                                sendreq->req_target_rank,
                                header->hdr_target_tag,
                                module->p2p_comm,
                                &(longreq->req_pml_req));
        
        /* put the send request in the waiting list */
        OPAL_THREAD_LOCK(&(module->p2p_lock));
        opal_list_append(&(module->p2p_long_msgs),
                         &(longreq->super.super));
        OPAL_THREAD_UNLOCK(&(module->p2p_lock));
    }

    return ret;
}
void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, 
                                          mca_btl_base_tag_t tag,
                                          mca_btl_base_descriptor_t* des,
                                          void* cbdata ) { 
    mca_btl_base_segment_t* segments = des->des_dst;
    mca_pml_ob1_match_hdr_t* hdr = (mca_pml_ob1_match_hdr_t*)segments->seg_addr.pval;
    ompi_communicator_t *comm_ptr;
    mca_pml_ob1_recv_request_t *match = NULL;
    mca_pml_ob1_comm_t *comm;
    mca_pml_ob1_comm_proc_t *proc;
    mca_pml_ob1_recv_frag_t* frag = NULL;
    size_t num_segments = des->des_dst_cnt;
    size_t bytes_received = 0;
    
    if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_OB1_MATCH_HDR_LEN) ) {
        return;
    }
    ob1_hdr_ntoh(((mca_pml_ob1_hdr_t*) hdr), MCA_PML_OB1_HDR_TYPE_MATCH);
    
    /* communicator pointer */
    comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
    if(OPAL_UNLIKELY(NULL == comm_ptr)) {
        /* This is a special case. A message for a not yet existing
         * communicator can happens. Instead of doing a matching we
         * will temporarily add it the a pending queue in the PML.
         * Later on, when the communicator is completely instantiated,
         * this pending queue will be searched and all matching fragments
         * moved to the right communicator.
         */
        append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending,
                             btl, hdr, segments, num_segments, frag );
        return;
    }
    comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
    
    /* source sequence number */
    proc = &comm->procs[hdr->hdr_src];
 
    /* We generate the MSG_ARRIVED event as soon as the PML is aware
     * of a matching fragment arrival. Independing if it is received
     * on the correct order or not. This will allow the tools to
     * figure out if the messages are not received in the correct
     * order (if multiple network interfaces).
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 
    /* get next expected message sequence number - if threaded
     * run, lock to make sure that if another thread is processing
     * a frag from the same message a match is made only once.
     * Also, this prevents other posted receives (for a pair of
     * end points) from being processed, and potentially "loosing"
     * the fragment.
     */
    OPAL_THREAD_LOCK(&comm->matching_lock);
    
     /* get sequence number of next message that can be processed */
    if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
                     (opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
        goto slow_path;
    }
    
    /* This is the sequence number we were expecting, so we can try
     * matching it to already posted receives.
     */
    
    /* We're now expecting the next sequence number. */
    proc->expected_sequence++;

    /* We generate the SEARCH_POSTED_QUEUE only when the message is
     * received in the correct sequence. Otherwise, we delay the event
     * generation until we reach the correct sequence number.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag);
    
    /* The match is over. We generate the SEARCH_POSTED_Q_END here,
     * before going into the mca_pml_ob1_check_cantmatch_for_match so
     * we can make a difference for the searching time for all
     * messages.
     */
    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
    
    /* release matching lock before processing fragment */
    OPAL_THREAD_UNLOCK(&comm->matching_lock);
    
    if(OPAL_LIKELY(match)) {
        bytes_received = segments->seg_len - OMPI_PML_OB1_MATCH_HDR_LEN;
        match->req_recv.req_bytes_packed = bytes_received;
        
        MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr);
        if(match->req_bytes_delivered > 0) { 
            struct iovec iov[2];
            uint32_t iov_count = 1;
            
            /*
             *  Make user buffer accessable(defined) before unpacking.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_defined,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
            
            iov[0].iov_len = bytes_received;
            iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
                                              OMPI_PML_OB1_MATCH_HDR_LEN);
            while (iov_count < num_segments) {
                bytes_received += segments[iov_count].seg_len;
                iov[iov_count].iov_len = segments[iov_count].seg_len;
                iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
                iov_count++;
            }
            ompi_convertor_unpack( &match->req_recv.req_base.req_convertor,
                                   iov,
                                   &iov_count,
                                   &bytes_received );
            match->req_bytes_received = bytes_received;
            /*
             *  Unpacking finished, make the user buffer unaccessable again.
             */
            MEMCHECKER(
                       memchecker_call(&opal_memchecker_base_mem_noaccess,
                                       match->req_recv.req_base.req_addr,
                                       match->req_recv.req_base.req_count,
                                       match->req_recv.req_base.req_datatype);
                       );
        }
Beispiel #3
0
static int
ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, 
                          struct ompi_convertor_t *convertor,
                          ompi_mtl_portals_request_t  *ptl_request)
{
    int ret;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    size_t buflen;
    
    if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) {
        /* the buffer is sitting in the short message queue */

        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;

        ompi_mtl_portals_recv_short_block_t *block = 
            recv_event->ev.md.user_ptr;

        iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
        iov.iov_len = recv_event->ev.mlength;
        max_data = iov.iov_len;

        /* see if this message filled the receive block */
        if (recv_event->ev.md.length - (recv_event->ev.offset + 
                                        recv_event->ev.mlength) <
            recv_event->ev.md.max_size) {
            block->full = true;
        }

        /* pull out the data */
        if (iov.iov_len > 0) {
            ompi_convertor_unpack(convertor, &iov, &iov_count,
                                  &max_data );
        }

        /* if synchronous, return an ack */
        if (PTL_IS_SYNC_MSG(recv_event->ev)) {
            md.length = 0;
            md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
            md.threshold = 1; /* send */
            md.options = PTL_MD_EVENT_START_DISABLE;
            md.user_ptr = NULL;
            md.eq_handle = ompi_mtl_portals.ptl_eq_h;

            ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                            PTL_UNLINK, &md_h);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlMDBind.  Error code - %d \n",ret);
                abort();
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                                 "acking recv: 0x%016llx\n", 
                                 recv_event->ev.match_bits));

            ret = PtlPut(md_h,
                         PTL_NO_ACK_REQ,
                         recv_event->ev.initiator,
                         OMPI_MTL_PORTALS_ACK_TABLE_ID,
                         0,
                         recv_event->ev.hdr_data,
                         0,
                         0);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlPut.  Error code - %d \n",ret);
                abort();
            }
        }

        /* finished with our buffer space */
        ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block);

        ompi_convertor_get_packed_size(convertor, &buflen);

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
        ptl_request->super.ompi_req->req_status._count = 
            recv_event->ev.mlength;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "recv complete: 0x%016llx\n", 
                             recv_event->ev.match_bits));
        
        ptl_request->super.completion_callback(&ptl_request->super);

    } else {
        ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                         &ptl_request->free_after);
        if (OMPI_SUCCESS != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }
        md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength;
        md.threshold = 2; /* send and get */
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        /* retain because it's unclear how many events we'll get here.
           Some implementations give just the REPLY, others give SEND
           and REPLY */
        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                        PTL_RETAIN, &md_h);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->event_callback = ompi_mtl_portals_recv_progress;

        ret = PtlGet(md_h, 
                     recv_event->ev.initiator, 
                     OMPI_MTL_PORTALS_READ_TABLE_ID,
                     0, 
                     recv_event->ev.hdr_data,
                     0);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from PtlGet.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
    }

    return OMPI_SUCCESS;
}
Beispiel #4
0
static int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk )
{
    MPI_Aint extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    ompi_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    ompi_ddt_type_extent( pdt, &extent );

    pdst  = malloc( extent * count );
    psrc  = malloc( extent * count );
    ptemp = malloc( chunk );

    {
        int i;
        for( i = 0; i < (count * extent); i++ )
            ((char*)psrc)[i] = i % 128 + 32;
    }
    memset( pdst, 0, count * extent );

    send_convertor = ompi_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != ompi_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = ompi_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = ompi_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = ompi_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OMPI_SUCCESS;
}
Beispiel #5
0
static int test_upper( unsigned int length )
{
    double *mat1, *mat2, *inbuf;
    ompi_datatype_t *pdt;
    ompi_convertor_t * pConv;
    char *ptr;
    int rc;
    unsigned int i, j, iov_count, split_chunk, total_length;
    size_t max_data;
    struct iovec a;
    TIMER_DATA_TYPE start, end;
    long total_time;

    printf( "test upper matrix\n" );
    pdt = upper_matrix( length );
    /*dt_dump( pdt );*/

    mat1 = malloc( length * length * sizeof(double) );
    init_random_upper_matrix( length, mat1 );
    mat2 = calloc( length * length, sizeof(double) );

    total_length = length * (length + 1) * ( sizeof(double) / 2);
    inbuf = (double*)malloc( total_length );
    ptr = (char*)inbuf;
    /* copy upper matrix in the array simulating the input buffer */
    for( i = 0; i < length; i++ ) {
        uint32_t pos = i * length;
        for( j = i; j < length; j++, pos++ ) {
            *inbuf = mat1[pos];
            inbuf++;
        }
    }
    inbuf = (double*)ptr;
    pConv = ompi_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( pConv, pdt, 1, mat2 ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OMPI_ERROR;
    }

    GET_TIME( start );
    split_chunk = (length + 1) * sizeof(double);
    /*    split_chunk = (total_length + 1) * sizeof(double); */
    for( i = total_length; i > 0; ) {
        if( i <= split_chunk ) {  /* equal test just to be able to set a breakpoint */
            split_chunk = i;
        }
        a.iov_base = ptr;
        a.iov_len = split_chunk;
        iov_count = 1;
        max_data = split_chunk;
        ompi_convertor_unpack( pConv, &a, &iov_count, &max_data );
        ptr += max_data;
        i -= max_data;
        if( mat2[0] != inbuf[0] ) assert(0);
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "complete unpacking in %ld microsec\n", total_time );
    /*    printf( "conversion done in %ld microsec\n", conversion_elapsed ); */
    /*    printf( "stack management in %ld microsec\n", total_time - conversion_elapsed ); */
    free( inbuf );
    rc = check_diag_matrix( length, mat1, mat2 );
    free( mat1 );
    free( mat2 );

    /* test the automatic destruction pf the data */
    ompi_ddt_destroy( &pdt ); assert( pdt == NULL );

    OBJ_RELEASE( pConv );
    return rc;
}
Beispiel #6
0
int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
                         int dest, int sendtag, int source, int recvtag,
                         MPI_Comm comm, MPI_Status *status)

{
    int rc = MPI_SUCCESS;

    if ( MPI_PARAM_CHECK ) {
        rc = MPI_SUCCESS;
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count);

        if (ompi_comm_invalid(comm)) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME);
        } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) {
            rc = MPI_ERR_RANK;
        } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) {
            rc = MPI_ERR_TAG;
        } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) {
            rc = MPI_ERR_RANK;
        } else if (((recvtag < 0) && (recvtag !=  MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) {
            rc = MPI_ERR_TAG;
        }
        OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
    }
 
    /* simple case */
    if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) {
        return MPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status);
    } else {

        ompi_convertor_t convertor;
        struct iovec iov;
        unsigned char recv_data[2048];
        size_t packed_size, max_data;
        uint32_t iov_count;
        ompi_status_public_t recv_status;
        ompi_proc_t* proc = ompi_comm_peer_lookup(comm,dest);
        if(proc == NULL) {
            rc = MPI_ERR_RANK;
            OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
        }

        /* initialize convertor to unpack recv buffer */
        OBJ_CONSTRUCT(&convertor, ompi_convertor_t);
        ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, datatype,
                                                  count, buf, 0, &convertor );

        /* setup a buffer for recv */
        ompi_convertor_get_packed_size( &convertor, &packed_size );
        if( packed_size > sizeof(recv_data) ) {
            rc = MPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base);
            if(OMPI_SUCCESS != rc) {
                OMPI_ERRHANDLER_RETURN(OMPI_ERR_OUT_OF_RESOURCE, comm, MPI_ERR_BUFFER, FUNC_NAME);
            }
        } else {
            iov.iov_base = (caddr_t)recv_data;
        }

        /* recv into temporary buffer */
        rc = MPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, 
                           MPI_BYTE, source, recvtag, comm, &recv_status );
        if (rc != MPI_SUCCESS) {
            if(packed_size > sizeof(recv_data))
                MPI_Free_mem(iov.iov_base);
            OBJ_DESTRUCT(&convertor);
            OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
        }

        /* unpack into users buffer */
        iov.iov_len = recv_status._count;
        iov_count = 1;
        max_data = recv_status._count;
        ompi_convertor_unpack(&convertor, &iov, &iov_count, &max_data );

        /* return status to user */
        if(status != MPI_STATUS_IGNORE) {
            *status = recv_status;
        }

        /* release resources */
        if(packed_size > sizeof(recv_data)) {
            MPI_Free_mem(iov.iov_base);
        }
        OBJ_DESTRUCT(&convertor);
        return MPI_SUCCESS;
    }
}
Beispiel #7
0
int
ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module,
                                ompi_osc_pt2pt_send_header_t *header,
                                void *inbuf)
{
    int ret = OMPI_SUCCESS;
    void *target = (unsigned char*) module->p2p_win->w_baseptr + 
        (header->hdr_target_disp * module->p2p_win->w_disp_unit);    
    ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
    struct ompi_datatype_t *datatype = 
        ompi_osc_pt2pt_datatype_create(proc, &inbuf);

    if (header->hdr_msg_length > 0) {
        ompi_convertor_t convertor;
        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;
        ompi_proc_t *proc;

        /* create convertor */
        OBJ_CONSTRUCT(&convertor, ompi_convertor_t);

        /* initialize convertor */
        proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin);
        ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor,
                                                 datatype,
                                                 header->hdr_target_count,
                                                 target,
                                                 0,
                                                 &convertor);
        iov.iov_len = header->hdr_msg_length;
        iov.iov_base = (IOVBASE_TYPE*)inbuf;
        max_data = iov.iov_len;
        ompi_convertor_unpack(&convertor, 
                              &iov,
                              &iov_count,
                              &max_data );
        OBJ_DESTRUCT(&convertor);
        OBJ_RELEASE(datatype);
        OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);
    
    } else {
            ompi_osc_pt2pt_longreq_t *longreq;
            ompi_osc_pt2pt_longreq_alloc(&longreq);

            longreq->req_comp_cb = ompi_osc_pt2pt_sendreq_recv_put_long_cb;
            longreq->req_comp_cbdata = NULL;
            longreq->req_datatype = datatype;
            longreq->req_module = module;

            ret = mca_pml.pml_irecv(target,
                                    header->hdr_target_count,
                                    datatype,
                                    header->hdr_origin,
                                    header->hdr_origin_tag,
                                    module->p2p_comm,
                                    &(longreq->req_pml_req));

            /* put the send request in the waiting list */
            OPAL_THREAD_LOCK(&(module->p2p_lock));
            opal_list_append(&(module->p2p_long_msgs), 
                             &(longreq->super.super));
            OPAL_THREAD_UNLOCK(&(module->p2p_lock));
    }

    return ret;
}