Example #1
0
int mca_btl_mx_finalize( struct mca_btl_base_module_t* btl )
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; 

    if( NULL != mx_btl->mx_endpoint )
        mx_close_endpoint(mx_btl->mx_endpoint);
    
    OBJ_DESTRUCT( &mx_btl->mx_lock );
    OBJ_DESTRUCT( &mx_btl->mx_peers );
    free(mx_btl);
    return OMPI_SUCCESS;
}
Example #2
0
int
ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl) { 
    mx_return_t mx_return;
    
    opal_progress_unregister(ompi_mtl_mx_progress);
    
    /* free resources */
    mx_return = mx_close_endpoint(ompi_mtl_mx.mx_endpoint);
    if(mx_return != MX_SUCCESS){ 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_close_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    return ompi_common_mx_finalize();
    
}
Example #3
0
int mca_btl_mx_ft_event(int state) {
    mca_btl_mx_module_t* mx_btl;
    int i;

    if(OPAL_CRS_CHECKPOINT == state) {
        /* Continue must reconstruct the routes (including modex), since we
         * have to tear down the devices completely.
         * We have to do this because the MX driver can be checkpointed, but
         * cannot be restarted with BLCR due to an mmap problem. If we do not
         * close MX then BLCR throws the following error in /var/log/messages:
         *   kernel: do_mmap(<file>, 00002aaab0aac000, 0000000000400000, ...) failed: ffffffffffffffff
         *   kernel: vmadump: mmap failed: /dev/mx0
         *   kernel: blcr: thaw_threads returned error, aborting. -1
         * JJH: It may be possible to, instead of restarting the entire driver, just reconnect endpoints
         */
        ompi_cr_continue_like_restart = true;

        for( i = 0; i < mca_btl_mx_component.mx_num_btls; i++ ) {
            mx_btl = mca_btl_mx_component.mx_btls[i];

            if( NULL != mx_btl->mx_endpoint ) {
                mx_close_endpoint(mx_btl->mx_endpoint);
                mx_btl->mx_endpoint = NULL;
            }
        }
    }
    else if(OPAL_CRS_CONTINUE == state) {
        ;
    }
    else if(OPAL_CRS_RESTART == state) {
        ;
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }

    return OMPI_SUCCESS;
}
Example #4
0
int 
main(int argc, char **argv)
{
	mx_endpoint_t ep;
	uint64_t nic_id;
	uint16_t my_eid;
	uint64_t his_nic_id;
	uint32_t board_id;
	uint32_t filter;
	uint16_t his_eid;
	mx_endpoint_addr_t his_addr;
	char *rem_host;
	int len;
	int iter;
	int c;
	int do_wait;
	int do_bothways;
	extern char *optarg;
	mx_return_t ret;

#if DEBUG
	extern int mx_debug_mask;
	mx_debug_mask = 0xFFF;
#endif

	mx_init();
	MX_MUTEX_INIT(&stream_mutex);
	/* set up defaults */
	rem_host = NULL;
	filter = FILTER;
	my_eid = DFLT_EID;
	his_eid = DFLT_EID;
	board_id = MX_ANY_NIC;
	len = DFLT_LEN;
	iter = DFLT_ITER;
	do_wait = 0;
	do_bothways = 0;
	num_threads = 1;

	while ((c = getopt(argc, argv, "hd:e:f:n:b:r:l:N:Vvwx")) != EOF) switch(c) {
	case 'd':
		rem_host = optarg;
		break;
	case 'e':
		my_eid = atoi(optarg);
		break;
	case 'f':
		filter = atoi(optarg);
		break;
	case 'n':
		sscanf(optarg, "%"SCNx64, &nic_id);
		mx_nic_id_to_board_number(nic_id, &board_id);
		break;
	case 'b':
		board_id = atoi(optarg);
		break;
	case 'r':
		his_eid = atoi(optarg);
		break;
	case 'l':
		len = atoi(optarg);
		if (len > MAX_LEN) {
			fprintf(stderr, "len too large, max is %d\n", MAX_LEN);
			exit(1);
		}
		break;
	case 'N':
		iter = atoi(optarg);
		break;
	case 'V':
		Verify = 1;
		break;
	case 'v':
		do_verbose = 1;
		break;
	case 'w':
		do_wait = 1;
		break;
	case 'x':
#if MX_THREAD_SAFE
		do_bothways = 1;
#else
		fprintf(stderr, "bi-directional mode only supported with threadsafe mx lib\n");
		exit(1);
#endif
		break;
	case 'h':
	default:
		usage();
		exit(1);
	}

	if (rem_host != NULL)
		num_threads += do_bothways;
	ret = mx_open_endpoint(board_id, my_eid, filter, NULL, 0, &ep);
	if (ret != MX_SUCCESS) {
		fprintf(stderr, "Failed to open endpoint %s\n", mx_strerror(ret));
		exit(1);
	}

	/* If no host, we are receiver */
	if (rem_host == NULL) {
		if (do_verbose)
			printf("Starting streaming receiver\n");
		if (Verify) {
			fprintf(stderr, "-V ignored.  Verify must be set by sender\n");
			Verify = 0;
		}

		if (do_wait)
			receiver_blocking(ep, MATCH_VAL_MAIN, filter);
		else
			receiver_polling(ep, MATCH_VAL_MAIN, filter);
		

	} else {
		/* get address of destination */
		mx_hostname_to_nic_id(rem_host, &his_nic_id);
		mx_connect(ep, his_nic_id, his_eid, filter, 
			   MX_INFINITE, &his_addr);
		if (do_verbose)
			printf("Starting streaming send to host %s\n", 
			       rem_host);
		if (Verify) printf("Verifying results\n");

		/* start up the sender */
		if (do_wait)
			sender_blocking(ep, his_addr, iter, len, 
					do_bothways,MATCH_VAL_MAIN);
		else
			sender_polling(ep, his_addr, iter, len, 
				       do_bothways, MATCH_VAL_MAIN);
	}		

  
	mx_close_endpoint(ep);
	mx_finalize();
	exit(0);
}