int mca_btl_mx_proc_connect( mca_btl_mx_endpoint_t* module_endpoint ) { int num_retry = 0; mx_return_t mx_status; mx_endpoint_addr_t mx_remote_addr; module_endpoint->status = MCA_BTL_MX_CONNECTION_PENDING; retry_connect: mx_status = mx_connect( module_endpoint->endpoint_btl->mx_endpoint, module_endpoint->mx_peer->nic_id, module_endpoint->mx_peer->endpoint_id, mca_btl_mx_component.mx_filter, mca_btl_mx_component.mx_timeout, &mx_remote_addr ); if( MX_SUCCESS != mx_status ) { if( MX_TIMEOUT == mx_status ) if( num_retry++ < mca_btl_mx_component.mx_connection_retries ) goto retry_connect; { char peer_name[MX_MAX_HOSTNAME_LEN]; if( MX_SUCCESS != mx_nic_id_to_hostname( module_endpoint->mx_peer->nic_id, peer_name ) ) sprintf( peer_name, "unknown %lx nic_id", (long)module_endpoint->mx_peer->nic_id ); opal_output( 0, "mx_connect fail for %s with key %x (error %s)\n\tUnique ID (local %x remote %x)\n", peer_name, mca_btl_mx_component.mx_filter, mx_strerror(mx_status), module_endpoint->endpoint_btl->mx_unique_network_id, module_endpoint->mx_peer->unique_network_id ); } module_endpoint->status = MCA_BTL_MX_NOT_REACHEABLE; return OMPI_ERROR; } module_endpoint->mx_peer_addr = mx_remote_addr; module_endpoint->status = MCA_BTL_MX_CONNECTED; return OMPI_SUCCESS; }
int MPID_nem_mx_vc_init (MPIDI_VC_t *vc) { uint32_t threshold; MPIDI_CH3I_VC *vc_ch = VC_CH(vc); int mpi_errno = MPI_SUCCESS; /* first make sure that our private fields in the vc fit into the area provided */ MPIU_Assert(sizeof(MPID_nem_mx_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN); #ifdef ONDEMAND VC_FIELD(vc, local_connected) = 0; VC_FIELD(vc, remote_connected) = 0; #else { char *business_card; int val_max_sz; int ret; #ifdef USE_PMI2_API val_max_sz = PMI2_MAX_VALLEN; #else mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz); #endif business_card = (char *)MPIU_Malloc(val_max_sz); mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg); if (mpi_errno) MPIU_ERR_POP(mpi_errno); mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id)); if (mpi_errno) MPIU_ERR_POP (mpi_errno); MPIU_Free(business_card); ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id), MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr))); MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret)); mx_set_endpoint_addr_context(VC_FIELD(vc, remote_endpoint_addr),(void *)vc); MPIDI_CHANGE_VC_STATE(vc, ACTIVE); } #endif mx_get_info(MPID_nem_mx_local_endpoint, MX_COPY_SEND_MAX, NULL, 0, &threshold, sizeof(uint32_t)); vc->eager_max_msg_sz = threshold; vc->rndvSend_fn = NULL; vc->sendNoncontig_fn = MPID_nem_mx_SendNoncontig; vc->comm_ops = &comm_ops; vc_ch->iStartContigMsg = MPID_nem_mx_iStartContigMsg; vc_ch->iSendContig = MPID_nem_mx_iSendContig; fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
mca_mtl_mx_endpoint_t* mca_mtl_mx_endpoint_create(ompi_proc_t* ompi_proc) { mca_mtl_mx_endpoint_t* mtl_mx_endpoint = NULL; int rc; mca_mtl_mx_addr_t *mx_peer; size_t size; mx_return_t mx_return; int num_retry = 0; /* get the remote proc's address (only one) */ rc = ompi_modex_recv(&mca_mtl_mx_component.super.mtl_version, ompi_proc, (void**)&mx_peer, &size); if( rc != OMPI_SUCCESS || size != sizeof(mca_mtl_mx_addr_t)) { return NULL; } mtl_mx_endpoint = (mca_mtl_mx_endpoint_t*) OBJ_NEW(mca_mtl_mx_endpoint_t); mtl_mx_endpoint->mx_peer = mx_peer; retry_connect: mx_return = mx_connect(ompi_mtl_mx.mx_endpoint, mx_peer->nic_id, mx_peer->endpoint_id, ompi_mtl_mx.mx_filter, ompi_mtl_mx.mx_timeout, &mtl_mx_endpoint->mx_peer_addr); if(MX_SUCCESS != mx_return) { char peer_name[MX_MAX_HOSTNAME_LEN]; if(MX_TIMEOUT == mx_return) { if( num_retry++ < ompi_mtl_mx.mx_retries ) { goto retry_connect; } } if(MX_SUCCESS != mx_nic_id_to_hostname( mx_peer->nic_id, peer_name)) { sprintf( peer_name, "unknown %lx nic_id", (long)mx_peer->nic_id ); } opal_output(ompi_mtl_base_output, "mx_connect fail for %s with key %x (error %s)\n", peer_name, ompi_mtl_mx.mx_filter, mx_strerror(mx_return) ); return NULL; } return mtl_mx_endpoint; }
int main(int argc, char **argv) { mx_endpoint_t ep; uint64_t nic_id; uint16_t my_eid; uint64_t his_nic_id; uint32_t board_id; uint32_t filter; uint16_t his_eid; mx_endpoint_addr_t his_addr; char *rem_host; int len; int iter; int c; int do_wait; int do_bothways; extern char *optarg; mx_return_t ret; #if DEBUG extern int mx_debug_mask; mx_debug_mask = 0xFFF; #endif mx_init(); MX_MUTEX_INIT(&stream_mutex); /* set up defaults */ rem_host = NULL; filter = FILTER; my_eid = DFLT_EID; his_eid = DFLT_EID; board_id = MX_ANY_NIC; len = DFLT_LEN; iter = DFLT_ITER; do_wait = 0; do_bothways = 0; num_threads = 1; while ((c = getopt(argc, argv, "hd:e:f:n:b:r:l:N:Vvwx")) != EOF) switch(c) { case 'd': rem_host = optarg; break; case 'e': my_eid = atoi(optarg); break; case 'f': filter = atoi(optarg); break; case 'n': sscanf(optarg, "%"SCNx64, &nic_id); mx_nic_id_to_board_number(nic_id, &board_id); break; case 'b': board_id = atoi(optarg); break; case 'r': his_eid = atoi(optarg); break; case 'l': len = atoi(optarg); if (len > MAX_LEN) { fprintf(stderr, "len too large, max is %d\n", MAX_LEN); exit(1); } break; case 'N': iter = atoi(optarg); break; case 'V': Verify = 1; break; case 'v': do_verbose = 1; break; case 'w': do_wait = 1; break; case 'x': #if MX_THREAD_SAFE do_bothways = 1; #else fprintf(stderr, "bi-directional mode only supported with threadsafe mx lib\n"); exit(1); #endif break; case 'h': default: usage(); exit(1); } if (rem_host != NULL) num_threads += do_bothways; ret = mx_open_endpoint(board_id, my_eid, filter, NULL, 0, &ep); if (ret != MX_SUCCESS) { fprintf(stderr, "Failed to open endpoint %s\n", mx_strerror(ret)); exit(1); } /* If no host, we are receiver */ if (rem_host == NULL) { if (do_verbose) printf("Starting streaming receiver\n"); if (Verify) { fprintf(stderr, "-V ignored. Verify must be set by sender\n"); Verify = 0; } if (do_wait) receiver_blocking(ep, MATCH_VAL_MAIN, filter); else receiver_polling(ep, MATCH_VAL_MAIN, filter); } else { /* get address of destination */ mx_hostname_to_nic_id(rem_host, &his_nic_id); mx_connect(ep, his_nic_id, his_eid, filter, MX_INFINITE, &his_addr); if (do_verbose) printf("Starting streaming send to host %s\n", rem_host); if (Verify) printf("Verifying results\n"); /* start up the sender */ if (do_wait) sender_blocking(ep, his_addr, iter, len, do_bothways,MATCH_VAL_MAIN); else sender_polling(ep, his_addr, iter, len, do_bothways, MATCH_VAL_MAIN); } mx_close_endpoint(ep); mx_finalize(); exit(0); }
static inline void receiver(mx_endpoint_t ep, int blocking, uint32_t match_val, uint32_t filter) { int count, len, iter, cur_req, num_req; mx_status_t stat; mx_request_t req[NUM_RREQ]; mx_request_t sreq; mx_segment_t seg; uint32_t result, usec; struct timeval start_time, end_time; double bw, pkts_per_sec; char *buffer; struct metadata info; int bothways; #if MX_THREAD_SAFE struct mx_thread_arg args; MX_THREAD_T thread; #endif uint64_t nic; uint32_t eid; seg.segment_ptr = &info; seg.segment_length = sizeof(info); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[0]); /* wait for the receive to complete */ mx_test_or_wait(blocking, ep, &req[0], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } if (filter != ~0) { /* filter == ~0 means recv threads on master */ mx_decompose_endpoint_addr(stat.source, &nic, &eid); mx_connect(ep, nic, eid, filter, MX_INFINITE, &stat.source); } len = ntohl(info.len); iter = ntohl(info.iter); Verify = ntohl(info.verify); bothways = ntohl(info.bothways); if (do_verbose) printf("Starting test: len = %d, iter = %d\n", len, iter); if (do_verbose && Verify) { printf("Verifying results\n"); } buffer = malloc(len * NUM_RREQ); if (buffer == NULL) { fprintf(stderr, "Can't allocate buffers\n"); exit(1); } if (bothways) { #if MX_THREAD_SAFE args.ep = ep; args.dest = stat.source; args.iter = iter; args.len = len; args.blocking = blocking; num_threads++; MX_THREAD_CREATE(&thread, &start_send_thread, &args); #else fprintf(stderr,"bothways not supported\n"); exit(1); #endif } /* pre-post our receives */ num_req = NUM_RREQ; if (num_req > iter) num_req = iter; for (cur_req = 0; cur_req < num_req; cur_req++) { seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[cur_req]); } MX_MUTEX_LOCK(&stream_mutex); ++threads_running; MX_MUTEX_UNLOCK(&stream_mutex); while(threads_running != num_threads) /* spin */; #if DO_HANDSHAKE /* post a send to let the sender know we are ready */ seg.segment_ptr = &info; seg.segment_length = sizeof(info); sreq = 0; mx_isend(ep, &seg, 1, stat.source, match_val, NULL, &sreq); mx_test_or_wait(blocking, ep, &sreq, MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "isend failed with status %s\n", mx_strstatus(stat.code)); exit(1); } #endif /* start the test */ gettimeofday(&start_time, NULL); for (count = 0; count < iter; count++) { /* wait for the receive to complete */ cur_req = count & (NUM_RREQ - 1); mx_test_or_wait(blocking, ep, &req[cur_req], MX_INFINITE, &stat, &result); if (!result) { fprintf(stderr, "mx_wait failed\n"); exit(1); } if (stat.code != MX_STATUS_SUCCESS) { fprintf(stderr, "irecv failed with status %s\n", mx_strstatus(stat.code)); exit(1); } if (stat.xfer_length != len) { fprintf(stderr, "bad len %d != %d\n", stat.xfer_length, len); exit(1); } /* hack since mx_cancel does not work */ if ((count + NUM_RREQ) > iter) continue; seg.segment_ptr = &buffer[cur_req * len]; seg.segment_length = len; if (Verify) mx_check_buffer(seg.segment_ptr, len); mx_irecv(ep, &seg, 1, match_val, MX_MATCH_MASK_NONE, 0, &req[cur_req]); } gettimeofday(&end_time, NULL); usec = end_time.tv_usec - start_time.tv_usec; usec += (end_time.tv_sec - start_time.tv_sec) * 1000000; bw = ((double)iter * (double)len) / (double) usec; pkts_per_sec = iter / ((double) usec / 1000000.0); global_bwinfo.bandwidth = bw; global_bwinfo.pkts_per_sec = pkts_per_sec; /* printf("%8d %5.3f %5.3f\n", len, bw, pkts_per_sec);*/ #if 0 /* mx_cancel assert(0)'s */ for (cur_req = 0; cur_req < num_req; cur_req++) { mx_cancel(ep, &req[cur_req]); } #endif info.usec = htonl(usec); seg.segment_ptr = &info; seg.segment_length = sizeof(info); sreq = 0; mx_isend(ep, &seg, 1, stat.source, match_val, NULL, &sreq); mx_test_or_wait(blocking, ep, &sreq, MX_INFINITE, &stat, &result); free(buffer); #if MX_THREAD_SAFE if(bothways) MX_THREAD_JOIN(thread); #endif }