mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, struct mca_mpool_base_registration_t* registration, struct ompi_convertor_t* convertor, uint8_t order, size_t reserve, size_t* size, uint32_t flags) { mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl; mca_btl_mx_frag_t* frag; mx_return_t mx_return; mx_segment_t mx_segment; int rc; MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc); if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } frag->segment[0].seg_len = *size; ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment[0].seg_addr.pval) ); frag->segment[0].seg_key.key64 = (uint64_t)(intptr_t)frag; mx_segment.segment_ptr = frag->segment[0].seg_addr.pval; mx_segment.segment_length = frag->segment[0].seg_len; mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, frag->segment[0].seg_key.key64, BTL_MX_PUT_MASK, NULL, &(frag->mx_request) ); if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) { opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" ); MCA_BTL_MX_FRAG_RETURN( btl, frag ); return NULL; } #ifdef HAVE_MX_FORGET { mx_return = mx_forget( mx_btl->mx_endpoint, &(frag->mx_request) ); if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) { opal_output( 0, "mx_forget failed in mca_btl_mx_prepare_dst with error %d (%s)\n", mx_return, mx_strerror(mx_return) ); return NULL; } } #endif /* Allow the fragment to be recycled using the mca_btl_mx_free function */ frag->type = MCA_BTL_MX_SEND; frag->base.des_dst = frag->segment; frag->base.des_dst_cnt = 1; frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; return &frag->base; }
static inline void sender(mx_endpoint_t ep,mx_endpoint_addr_t dest, int iter, int len, int blocking, int bothways, uint32_t match_val) { int count, cur_req, num_req; mx_status_t stat; mx_request_t req[NUM_SREQ]; mx_segment_t seg; uint32_t result; struct metadata info; char *buffer; uint32_t usec; double bw, pkts_per_sec; #if MX_THREAD_SAFE MX_THREAD_T thread; struct mx_thread_arg args; #endif struct bwinfo *bwinfo = NULL; buffer = malloc(len * NUM_SREQ); if (buffer == NULL) { fprintf(stderr, "Can't allocate buffers\n"); exit(1); } seg.segment_ptr = &info; seg.segment_length = sizeof(info); info.len = htonl(len); info.iter = htonl(iter); info.verify = htonl(Verify); info.bothways = htonl(bothways); if (bothways) { #if MX_THREAD_SAFE args.ep = ep; args.dest = dest; args.iter = iter; args.len = len; args.blocking = blocking; MX_THREAD_CREATE(&thread, &start_recv_thread, &args); #else fprintf(stderr, "bothways not supported\n"); exit(1); #endif } mx_isend(ep, &seg, 1, dest, match_val, NULL, &req[0]); /* wait for the send to complete */ mx_test_or_wait(blocking, ep, &req[0], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "isend failed with status %s\n", mx_strstatus(stat.code)); exit(1); } MX_MUTEX_LOCK(&stream_mutex); ++threads_running; MX_MUTEX_UNLOCK(&stream_mutex); while(threads_running != num_threads) /* spin */; #if DO_HANDSHAKE /* wait for the receiver to get ready */ seg.segment_ptr = &info; seg.segment_length = sizeof(info); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[0]); /* wait for the receive to complete */ mx_test_or_wait(blocking, ep, &req[0], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } #endif num_req = NUM_SREQ; if (num_req > iter) num_req = iter; for (cur_req = 0; cur_req < num_req; cur_req++) { seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; if (Verify) mx_fill_buffer(seg.segment_ptr, len); mx_isend(ep, &seg, 1, dest, match_val, NULL, &req[cur_req]); } for (count = 0; count < iter; count++) { /* wait for the send to complete */ cur_req = count & (NUM_SREQ - 1); mx_test_or_wait(blocking, ep, &req[cur_req], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "isend failed with status %s\n", mx_strstatus(stat.code)); exit(1); } /* hack since mx_cancel does not work */ if ((count + NUM_SREQ) >= iter) continue; seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; if (Verify) mx_fill_buffer(seg.segment_ptr, len); mx_isend(ep, &seg, 1, dest, match_val, NULL, &req[cur_req]); } seg.segment_ptr = &info; seg.segment_length = sizeof(info); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[0]); /* wait for the receive to complete */ mx_test_or_wait(blocking, ep, &req[0], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } usec = ntohl(info.usec); bw = ((double)iter * (double)len) / (double) usec; pkts_per_sec = iter / ((double) usec / 1000000.0); if (match_val == MATCH_VAL_THREAD) return; if (match_val == MATCH_VAL_MAIN && bothways) #if MX_THREAD_SAFE if(bothways) { printf("Send: %8d %5.3f %5.3f\n", len, bw, pkts_per_sec); MX_THREAD_JOIN(thread); bwinfo = &global_bwinfo; } #endif if (bwinfo) { printf("Recv: %8d %5.3f %5.3f\n", len, bwinfo->bandwidth, bwinfo->pkts_per_sec); bw += bwinfo->bandwidth; pkts_per_sec += bwinfo->pkts_per_sec; } printf("Total: %8d %5.3f %5.3f\n", len, bw, pkts_per_sec); }
int mca_btl_mx_register( struct mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_module_recv_cb_fn_t cbfunc, void* cbdata ) { mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; #if 0 if( (NULL != cbfunc) && ( 0 == mca_btl_mx_component.mx_use_unexpected) ) { #endif if( NULL != cbfunc ) { mca_btl_mx_frag_t* frag; mx_return_t mx_return; mx_segment_t mx_segment; int i, rc; /* Post the receives if there is no unexpected handler */ for( i = 0; i < mca_btl_mx_component.mx_max_posted_recv; i++ ) { MCA_BTL_MX_FRAG_ALLOC_EAGER( mx_btl, frag, rc ); if( NULL == frag ) { opal_output( 0, "mca_btl_mx_register: unable to allocate more eager fragments\n" ); if( 0 == i ) { return OMPI_ERROR; } break; /* some fragments are already registered. Try to continue... */ } frag->base.des_dst = frag->segment; frag->base.des_dst_cnt = 1; frag->base.des_src = NULL; frag->base.des_src_cnt = 0; frag->mx_frag_list = NULL; frag->type = MCA_BTL_MX_RECV; mx_segment.segment_ptr = (void*)(frag+1); mx_segment.segment_length = mx_btl->super.btl_eager_limit; mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1, 0x01ULL, BTL_MX_RECV_MASK, frag, &(frag->mx_request) ); if( MX_SUCCESS != mx_return ) { opal_output( 0, "mca_btl_mx_register: mx_irecv failed with status %d (%s)\n", mx_return, mx_strerror(mx_return) ); MCA_BTL_MX_FRAG_RETURN( mx_btl, frag ); return OMPI_ERROR; } } } return OMPI_SUCCESS; } /** * Allocate a segment. * * @param btl (IN) BTL module * @param size (IN) Request segment size. */ mca_btl_base_descriptor_t* mca_btl_mx_alloc( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, uint8_t order, size_t size, uint32_t flags) { mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; mca_btl_mx_frag_t* frag; int rc; MCA_BTL_MX_FRAG_ALLOC_EAGER(mx_btl, frag, rc); if( OPAL_UNLIKELY(NULL == frag) ) { return NULL; } frag->segment[0].seg_len = size <= mx_btl->super.btl_eager_limit ? size : mx_btl->super.btl_eager_limit ; frag->segment[0].seg_addr.pval = (void*)(frag+1); frag->base.des_src = frag->segment; frag->base.des_src_cnt = 1; frag->base.des_flags = flags; frag->base.order = MCA_BTL_NO_ORDER; return (mca_btl_base_descriptor_t*)frag; }
static inline void receiver(mx_endpoint_t ep, int blocking, uint32_t match_val, uint32_t filter) { int count, len, iter, cur_req, num_req; mx_status_t stat; mx_request_t req[NUM_RREQ]; mx_request_t sreq; mx_segment_t seg; uint32_t result, usec; struct timeval start_time, end_time; double bw, pkts_per_sec; char *buffer; struct metadata info; int bothways; #if MX_THREAD_SAFE struct mx_thread_arg args; MX_THREAD_T thread; #endif uint64_t nic; uint32_t eid; seg.segment_ptr = &info; seg.segment_length = sizeof(info); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[0]); /* wait for the receive to complete */ mx_test_or_wait(blocking, ep, &req[0], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } if (filter != ~0) { /* filter == ~0 means recv threads on master */ mx_decompose_endpoint_addr(stat.source, &nic, &eid); mx_connect(ep, nic, eid, filter, MX_INFINITE, &stat.source); } len = ntohl(info.len); iter = ntohl(info.iter); Verify = ntohl(info.verify); bothways = ntohl(info.bothways); if (do_verbose) printf("Starting test: len = %d, iter = %d\n", len, iter); if (do_verbose && Verify) { printf("Verifying results\n"); } buffer = malloc(len * NUM_RREQ); if (buffer == NULL) { fprintf(stderr, "Can't allocate buffers\n"); exit(1); } if (bothways) { #if MX_THREAD_SAFE args.ep = ep; args.dest = stat.source; args.iter = iter; args.len = len; args.blocking = blocking; num_threads++; MX_THREAD_CREATE(&thread, &start_send_thread, &args); #else fprintf(stderr,"bothways not supported\n"); exit(1); #endif } /* pre-post our receives */ num_req = NUM_RREQ; if (num_req > iter) num_req = iter; for (cur_req = 0; cur_req < num_req; cur_req++) { seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[cur_req]); } MX_MUTEX_LOCK(&stream_mutex); ++threads_running; MX_MUTEX_UNLOCK(&stream_mutex); while(threads_running != num_threads) /* spin */; #if DO_HANDSHAKE /* post a send to let the sender know we are ready */ seg.segment_ptr = &info; seg.segment_length = sizeof(info); sreq = 0; mx_isend(ep, &seg, 1, stat.source, match_val, NULL, &sreq); mx_test_or_wait(blocking, ep, &sreq, MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "isend failed with status %s\n", mx_strstatus(stat.code)); exit(1); } #endif /* start the test */ gettimeofday(&start_time, NULL); for (count = 0; count < iter; count++) { /* wait for the receive to complete */ cur_req = count & (NUM_RREQ - 1); mx_test_or_wait(blocking, ep, &req[cur_req], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } if (stat.xfer_length != len) { fprintf(stderr, "bad len %d != %d\n", stat.xfer_length, len); exit(1); } /* hack since mx_cancel does not work */ if ((count + NUM_RREQ) > iter) continue; seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; if (Verify) mx_check_buffer(seg.segment_ptr, len); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[cur_req]); } gettimeofday(&end_time, NULL); usec = end_time.tv_usec - start_time.tv_usec; usec += (end_time.tv_sec - start_time.tv_sec) * 1000000; bw = ((double)iter * (double)len) / (double) usec; pkts_per_sec = iter / ((double) usec / 1000000.0); global_bwinfo.bandwidth = bw; global_bwinfo.pkts_per_sec = pkts_per_sec; /* printf("%8d %5.3f %5.3f\n", len, bw, pkts_per_sec);*/ #if 0 /* mx_cancel assert(0)'s */ for (cur_req = 0; cur_req < num_req; cur_req++) { mx_cancel(ep, &req[cur_req]); } #endif info.usec = htonl(usec); seg.segment_ptr = &info; seg.segment_length = sizeof(info); sreq = 0; mx_isend(ep, &seg, 1, stat.source, match_val, NULL, &sreq); mx_test_or_wait(blocking, ep, &sreq, MX_INFINITE, &stat, &result); free(buffer); #if MX_THREAD_SAFE if(bothways) MX_THREAD_JOIN(thread); #endif }