int mca_btl_mx_finalize( struct mca_btl_base_module_t* btl ) { mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; if( NULL != mx_btl->mx_endpoint ) mx_close_endpoint(mx_btl->mx_endpoint); OBJ_DESTRUCT( &mx_btl->mx_lock ); OBJ_DESTRUCT( &mx_btl->mx_peers ); free(mx_btl); return OMPI_SUCCESS; }
int ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl) { mx_return_t mx_return; opal_progress_unregister(ompi_mtl_mx_progress); /* free resources */ mx_return = mx_close_endpoint(ompi_mtl_mx.mx_endpoint); if(mx_return != MX_SUCCESS){ opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_close_endpoint (error %s)\n", mx_strerror(mx_return)); return OMPI_ERROR; } return ompi_common_mx_finalize(); }
int mca_btl_mx_ft_event(int state) { mca_btl_mx_module_t* mx_btl; int i; if(OPAL_CRS_CHECKPOINT == state) { /* Continue must reconstruct the routes (including modex), since we * have to tear down the devices completely. * We have to do this because the MX driver can be checkpointed, but * cannot be restarted with BLCR due to an mmap problem. If we do not * close MX then BLCR throws the following error in /var/log/messages: * kernel: do_mmap(<file>, 00002aaab0aac000, 0000000000400000, ...) failed: ffffffffffffffff * kernel: vmadump: mmap failed: /dev/mx0 * kernel: blcr: thaw_threads returned error, aborting. -1 * JJH: It may be possible to, instead of restarting the entire driver, just reconnect endpoints */ ompi_cr_continue_like_restart = true; for( i = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) { mx_btl = mca_btl_mx_component.mx_btls[i]; if( NULL != mx_btl->mx_endpoint ) { mx_close_endpoint(mx_btl->mx_endpoint); mx_btl->mx_endpoint = NULL; } } } else if(OPAL_CRS_CONTINUE == state) { ; } else if(OPAL_CRS_RESTART == state) { ; } else if(OPAL_CRS_TERM == state ) { ; } else { ; } return OMPI_SUCCESS; }
int main(int argc, char **argv) { mx_endpoint_t ep; uint64_t nic_id; uint16_t my_eid; uint64_t his_nic_id; uint32_t board_id; uint32_t filter; uint16_t his_eid; mx_endpoint_addr_t his_addr; char *rem_host; int len; int iter; int c; int do_wait; int do_bothways; extern char *optarg; mx_return_t ret; #if DEBUG extern int mx_debug_mask; mx_debug_mask = 0xFFF; #endif mx_init(); MX_MUTEX_INIT(&stream_mutex); /* set up defaults */ rem_host = NULL; filter = FILTER; my_eid = DFLT_EID; his_eid = DFLT_EID; board_id = MX_ANY_NIC; len = DFLT_LEN; iter = DFLT_ITER; do_wait = 0; do_bothways = 0; num_threads = 1; while ((c = getopt(argc, argv, "hd:e:f:n:b:r:l:N:Vvwx")) != EOF) switch(c) { case 'd': rem_host = optarg; break; case 'e': my_eid = atoi(optarg); break; case 'f': filter = atoi(optarg); break; case 'n': sscanf(optarg, "%"SCNx64, &nic_id); mx_nic_id_to_board_number(nic_id, &board_id); break; case 'b': board_id = atoi(optarg); break; case 'r': his_eid = atoi(optarg); break; case 'l': len = atoi(optarg); if (len > MAX_LEN) { fprintf(stderr, "len too large, max is %d\n", MAX_LEN); exit(1); } break; case 'N': iter = atoi(optarg); break; case 'V': Verify = 1; break; case 'v': do_verbose = 1; break; case 'w': do_wait = 1; break; case 'x': #if MX_THREAD_SAFE do_bothways = 1; #else fprintf(stderr, "bi-directional mode only supported with threadsafe mx lib\n"); exit(1); #endif break; case 'h': default: usage(); exit(1); } if (rem_host != NULL) num_threads += do_bothways; ret = mx_open_endpoint(board_id, my_eid, filter, NULL, 0, &ep); if (ret != MX_SUCCESS) { fprintf(stderr, "Failed to open endpoint %s\n", mx_strerror(ret)); exit(1); } /* If no host, we are receiver */ if (rem_host == NULL) { if (do_verbose) printf("Starting streaming receiver\n"); if (Verify) { fprintf(stderr, "-V ignored. Verify must be set by sender\n"); Verify = 0; } if (do_wait) receiver_blocking(ep, MATCH_VAL_MAIN, filter); else receiver_polling(ep, MATCH_VAL_MAIN, filter); } else { /* get address of destination */ mx_hostname_to_nic_id(rem_host, &his_nic_id); mx_connect(ep, his_nic_id, his_eid, filter, MX_INFINITE, &his_addr); if (do_verbose) printf("Starting streaming send to host %s\n", rem_host); if (Verify) printf("Verifying results\n"); /* start up the sender */ if (do_wait) sender_blocking(ep, his_addr, iter, len, do_bothways,MATCH_VAL_MAIN); else sender_polling(ep, his_addr, iter, len, do_bothways, MATCH_VAL_MAIN); } mx_close_endpoint(ep); mx_finalize(); exit(0); }