int ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_sendreq_t *sendreq, ompi_osc_pt2pt_reply_header_t *header, void *payload) { int ret = OMPI_SUCCESS; /* receive into user buffer */ if (header->hdr_msg_length > 0) { /* short message. woo! */ struct iovec iov; uint32_t iov_count = 1; size_t max_data; iov.iov_len = header->hdr_msg_length; iov.iov_base = (IOVBASE_TYPE*)payload; max_data = iov.iov_len; ompi_convertor_unpack(&sendreq->req_origin_convertor, &iov, &iov_count, &max_data ); OPAL_THREAD_ADD32(&(sendreq->req_module->p2p_num_pending_out), -1); ompi_osc_pt2pt_sendreq_free(sendreq); } else { ompi_osc_pt2pt_longreq_t *longreq; ompi_osc_pt2pt_longreq_alloc(&longreq); longreq->req_comp_cb = ompi_osc_pt2pt_replyreq_recv_long_cb; longreq->req_comp_cbdata = sendreq; longreq->req_module = module; /* BWB - FIX ME - George is going to kill me for this */ ret = mca_pml.pml_irecv(sendreq->req_origin_convertor.pBaseBuf, sendreq->req_origin_convertor.count, sendreq->req_origin_datatype, sendreq->req_target_rank, header->hdr_target_tag, module->p2p_comm, &(longreq->req_pml_req)); /* put the send request in the waiting list */ OPAL_THREAD_LOCK(&(module->p2p_lock)); opal_list_append(&(module->p2p_long_msgs), &(longreq->super.super)); OPAL_THREAD_UNLOCK(&(module->p2p_lock)); } return ret; }
void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { mca_btl_base_segment_t* segments = des->des_dst; mca_pml_ob1_match_hdr_t* hdr = (mca_pml_ob1_match_hdr_t*)segments->seg_addr.pval; ompi_communicator_t *comm_ptr; mca_pml_ob1_recv_request_t *match = NULL; mca_pml_ob1_comm_t *comm; mca_pml_ob1_comm_proc_t *proc; mca_pml_ob1_recv_frag_t* frag = NULL; size_t num_segments = des->des_dst_cnt; size_t bytes_received = 0; if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_OB1_MATCH_HDR_LEN) ) { return; } ob1_hdr_ntoh(((mca_pml_ob1_hdr_t*) hdr), MCA_PML_OB1_HDR_TYPE_MATCH); /* communicator pointer */ comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); if(OPAL_UNLIKELY(NULL == comm_ptr)) { /* This is a special case. A message for a not yet existing * communicator can happens. Instead of doing a matching we * will temporarily add it the a pending queue in the PML. * Later on, when the communicator is completely instantiated, * this pending queue will be searched and all matching fragments * moved to the right communicator. */ append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, btl, hdr, segments, num_segments, frag ); return; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; /* source sequence number */ proc = &comm->procs[hdr->hdr_src]; /* We generate the MSG_ARRIVED event as soon as the PML is aware * of a matching fragment arrival. Independing if it is received * on the correct order or not. This will allow the tools to * figure out if the messages are not received in the correct * order (if multiple network interfaces). */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); /* get next expected message sequence number - if threaded * run, lock to make sure that if another thread is processing * a frag from the same message a match is made only once. * Also, this prevents other posted receives (for a pair of * end points) from being processed, and potentially "loosing" * the fragment. */ OPAL_THREAD_LOCK(&comm->matching_lock); /* get sequence number of next message that can be processed */ if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { goto slow_path; } /* This is the sequence number we were expecting, so we can try * matching it to already posted receives. */ /* We're now expecting the next sequence number. */ proc->expected_sequence++; /* We generate the SEARCH_POSTED_QUEUE only when the message is * received in the correct sequence. Otherwise, we delay the event * generation until we reach the correct sequence number. */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag); /* The match is over. We generate the SEARCH_POSTED_Q_END here, * before going into the mca_pml_ob1_check_cantmatch_for_match so * we can make a difference for the searching time for all * messages. */ PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); /* release matching lock before processing fragment */ OPAL_THREAD_UNLOCK(&comm->matching_lock); if(OPAL_LIKELY(match)) { bytes_received = segments->seg_len - OMPI_PML_OB1_MATCH_HDR_LEN; match->req_recv.req_bytes_packed = bytes_received; MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr); if(match->req_bytes_delivered > 0) { struct iovec iov[2]; uint32_t iov_count = 1; /* * Make user buffer accessable(defined) before unpacking. */ MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_defined, match->req_recv.req_base.req_addr, match->req_recv.req_base.req_count, match->req_recv.req_base.req_datatype); ); iov[0].iov_len = bytes_received; iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval + OMPI_PML_OB1_MATCH_HDR_LEN); while (iov_count < num_segments) { bytes_received += segments[iov_count].seg_len; iov[iov_count].iov_len = segments[iov_count].seg_len; iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval); iov_count++; } ompi_convertor_unpack( &match->req_recv.req_base.req_convertor, iov, &iov_count, &bytes_received ); match->req_bytes_received = bytes_received; /* * Unpacking finished, make the user buffer unaccessable again. */ MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_noaccess, match->req_recv.req_base.req_addr, match->req_recv.req_base.req_count, match->req_recv.req_base.req_datatype); ); }
static int ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, struct ompi_convertor_t *convertor, ompi_mtl_portals_request_t *ptl_request) { int ret; ptl_md_t md; ptl_handle_md_t md_h; size_t buflen; if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) { /* the buffer is sitting in the short message queue */ struct iovec iov; uint32_t iov_count = 1; size_t max_data; ompi_mtl_portals_recv_short_block_t *block = recv_event->ev.md.user_ptr; iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset); iov.iov_len = recv_event->ev.mlength; max_data = iov.iov_len; /* see if this message filled the receive block */ if (recv_event->ev.md.length - (recv_event->ev.offset + recv_event->ev.mlength) < recv_event->ev.md.max_size) { block->full = true; } /* pull out the data */ if (iov.iov_len > 0) { ompi_convertor_unpack(convertor, &iov, &iov_count, &max_data ); } /* if synchronous, return an ack */ if (PTL_IS_SYNC_MSG(recv_event->ev)) { md.length = 0; md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset); md.threshold = 1; /* send */ md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = NULL; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &md_h); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlMDBind. Error code - %d \n",ret); abort(); } OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "acking recv: 0x%016llx\n", recv_event->ev.match_bits)); ret = PtlPut(md_h, PTL_NO_ACK_REQ, recv_event->ev.initiator, OMPI_MTL_PORTALS_ACK_TABLE_ID, 0, recv_event->ev.hdr_data, 0, 0); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlPut. Error code - %d \n",ret); abort(); } } /* finished with our buffer space */ ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block); ompi_convertor_get_packed_size(convertor, &buflen); ptl_request->super.ompi_req->req_status.MPI_SOURCE = PTL_GET_SOURCE(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_TAG = PTL_GET_TAG(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_ERROR = (recv_event->ev.rlength > buflen) ? MPI_ERR_TRUNCATE : MPI_SUCCESS; ptl_request->super.ompi_req->req_status._count = recv_event->ev.mlength; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv complete: 0x%016llx\n", recv_event->ev.match_bits)); ptl_request->super.completion_callback(&ptl_request->super); } else { ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen, &ptl_request->free_after); if (OMPI_SUCCESS != ret) { opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf. Error code - %d \n",ret); abort(); } md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength; md.threshold = 2; /* send and get */ md.options = PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; /* retain because it's unclear how many events we'll get here. Some implementations give just the REPLY, others give SEND and REPLY */ ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_RETAIN, &md_h); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf. Error code - %d \n",ret); abort(); } ptl_request->event_callback = ompi_mtl_portals_recv_progress; ret = PtlGet(md_h, recv_event->ev.initiator, OMPI_MTL_PORTALS_READ_TABLE_ID, 0, recv_event->ev.hdr_data, 0); if (PTL_OK != ret) { opal_output(fileno(stderr)," Error returned from PtlGet. Error code - %d \n",ret); abort(); } ptl_request->super.ompi_req->req_status.MPI_SOURCE = PTL_GET_SOURCE(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_TAG = PTL_GET_TAG(recv_event->ev.match_bits); ptl_request->super.ompi_req->req_status.MPI_ERROR = (recv_event->ev.rlength > buflen) ? MPI_ERR_TRUNCATE : MPI_SUCCESS; } return OMPI_SUCCESS; }
static int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk ) { MPI_Aint extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; ompi_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; ompi_ddt_type_extent( pdt, &extent ); pdst = malloc( extent * count ); psrc = malloc( extent * count ); ptemp = malloc( chunk ); { int i; for( i = 0; i < (count * extent); i++ ) ((char*)psrc)[i] = i % 128 + 32; } memset( pdst, 0, count * extent ); send_convertor = ompi_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != ompi_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = ompi_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = ompi_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = ompi_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OMPI_SUCCESS; }
static int test_upper( unsigned int length ) { double *mat1, *mat2, *inbuf; ompi_datatype_t *pdt; ompi_convertor_t * pConv; char *ptr; int rc; unsigned int i, j, iov_count, split_chunk, total_length; size_t max_data; struct iovec a; TIMER_DATA_TYPE start, end; long total_time; printf( "test upper matrix\n" ); pdt = upper_matrix( length ); /*dt_dump( pdt );*/ mat1 = malloc( length * length * sizeof(double) ); init_random_upper_matrix( length, mat1 ); mat2 = calloc( length * length, sizeof(double) ); total_length = length * (length + 1) * ( sizeof(double) / 2); inbuf = (double*)malloc( total_length ); ptr = (char*)inbuf; /* copy upper matrix in the array simulating the input buffer */ for( i = 0; i < length; i++ ) { uint32_t pos = i * length; for( j = i; j < length; j++, pos++ ) { *inbuf = mat1[pos]; inbuf++; } } inbuf = (double*)ptr; pConv = ompi_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( pConv, pdt, 1, mat2 ) ) { printf( "Cannot attach the datatype to a convertor\n" ); return OMPI_ERROR; } GET_TIME( start ); split_chunk = (length + 1) * sizeof(double); /* split_chunk = (total_length + 1) * sizeof(double); */ for( i = total_length; i > 0; ) { if( i <= split_chunk ) { /* equal test just to be able to set a breakpoint */ split_chunk = i; } a.iov_base = ptr; a.iov_len = split_chunk; iov_count = 1; max_data = split_chunk; ompi_convertor_unpack( pConv, &a, &iov_count, &max_data ); ptr += max_data; i -= max_data; if( mat2[0] != inbuf[0] ) assert(0); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "complete unpacking in %ld microsec\n", total_time ); /* printf( "conversion done in %ld microsec\n", conversion_elapsed ); */ /* printf( "stack management in %ld microsec\n", total_time - conversion_elapsed ); */ free( inbuf ); rc = check_diag_matrix( length, mat1, mat2 ); free( mat1 ); free( mat2 ); /* test the automatic destruction pf the data */ ompi_ddt_destroy( &pdt ); assert( pdt == NULL ); OBJ_RELEASE( pConv ); return rc; }
int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status) { int rc = MPI_SUCCESS; if ( MPI_PARAM_CHECK ) { rc = MPI_SUCCESS; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count); if (ompi_comm_invalid(comm)) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) { rc = MPI_ERR_RANK; } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) { rc = MPI_ERR_TAG; } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) { rc = MPI_ERR_RANK; } else if (((recvtag < 0) && (recvtag != MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) { rc = MPI_ERR_TAG; } OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME); } /* simple case */ if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) { return MPI_Sendrecv(buf,count,datatype,dest,sendtag,buf,count,datatype,source,recvtag,comm,status); } else { ompi_convertor_t convertor; struct iovec iov; unsigned char recv_data[2048]; size_t packed_size, max_data; uint32_t iov_count; ompi_status_public_t recv_status; ompi_proc_t* proc = ompi_comm_peer_lookup(comm,dest); if(proc == NULL) { rc = MPI_ERR_RANK; OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } /* initialize convertor to unpack recv buffer */ OBJ_CONSTRUCT(&convertor, ompi_convertor_t); ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, datatype, count, buf, 0, &convertor ); /* setup a buffer for recv */ ompi_convertor_get_packed_size( &convertor, &packed_size ); if( packed_size > sizeof(recv_data) ) { rc = MPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base); if(OMPI_SUCCESS != rc) { OMPI_ERRHANDLER_RETURN(OMPI_ERR_OUT_OF_RESOURCE, comm, MPI_ERR_BUFFER, FUNC_NAME); } } else { iov.iov_base = (caddr_t)recv_data; } /* recv into temporary buffer */ rc = MPI_Sendrecv( buf, count, datatype, dest, sendtag, iov.iov_base, packed_size, MPI_BYTE, source, recvtag, comm, &recv_status ); if (rc != MPI_SUCCESS) { if(packed_size > sizeof(recv_data)) MPI_Free_mem(iov.iov_base); OBJ_DESTRUCT(&convertor); OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } /* unpack into users buffer */ iov.iov_len = recv_status._count; iov_count = 1; max_data = recv_status._count; ompi_convertor_unpack(&convertor, &iov, &iov_count, &max_data ); /* return status to user */ if(status != MPI_STATUS_IGNORE) { *status = recv_status; } /* release resources */ if(packed_size > sizeof(recv_data)) { MPI_Free_mem(iov.iov_base); } OBJ_DESTRUCT(&convertor); return MPI_SUCCESS; } }
int ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_send_header_t *header, void *inbuf) { int ret = OMPI_SUCCESS; void *target = (unsigned char*) module->p2p_win->w_baseptr + (header->hdr_target_disp * module->p2p_win->w_disp_unit); ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin ); struct ompi_datatype_t *datatype = ompi_osc_pt2pt_datatype_create(proc, &inbuf); if (header->hdr_msg_length > 0) { ompi_convertor_t convertor; struct iovec iov; uint32_t iov_count = 1; size_t max_data; ompi_proc_t *proc; /* create convertor */ OBJ_CONSTRUCT(&convertor, ompi_convertor_t); /* initialize convertor */ proc = ompi_comm_peer_lookup(module->p2p_comm, header->hdr_origin); ompi_convertor_copy_and_prepare_for_recv(proc->proc_convertor, datatype, header->hdr_target_count, target, 0, &convertor); iov.iov_len = header->hdr_msg_length; iov.iov_base = (IOVBASE_TYPE*)inbuf; max_data = iov.iov_len; ompi_convertor_unpack(&convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT(&convertor); OBJ_RELEASE(datatype); OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1); } else { ompi_osc_pt2pt_longreq_t *longreq; ompi_osc_pt2pt_longreq_alloc(&longreq); longreq->req_comp_cb = ompi_osc_pt2pt_sendreq_recv_put_long_cb; longreq->req_comp_cbdata = NULL; longreq->req_datatype = datatype; longreq->req_module = module; ret = mca_pml.pml_irecv(target, header->hdr_target_count, datatype, header->hdr_origin, header->hdr_origin_tag, module->p2p_comm, &(longreq->req_pml_req)); /* put the send request in the waiting list */ OPAL_THREAD_LOCK(&(module->p2p_lock)); opal_list_append(&(module->p2p_long_msgs), &(longreq->super.super)); OPAL_THREAD_UNLOCK(&(module->p2p_lock)); } return ret; }