Ejemplo n.º 1
0
int mca_btl_mx_proc_connect( mca_btl_mx_endpoint_t* module_endpoint )
{
    int num_retry = 0;
    mx_return_t mx_status;
    mx_endpoint_addr_t mx_remote_addr;

    module_endpoint->status = MCA_BTL_MX_CONNECTION_PENDING;

 retry_connect:
    mx_status = mx_connect( module_endpoint->endpoint_btl->mx_endpoint,
                            module_endpoint->mx_peer->nic_id, module_endpoint->mx_peer->endpoint_id,
                            mca_btl_mx_component.mx_filter, mca_btl_mx_component.mx_timeout, &mx_remote_addr );
    if( MX_SUCCESS != mx_status ) {
        if( MX_TIMEOUT == mx_status )
            if( num_retry++ < mca_btl_mx_component.mx_connection_retries )
                goto retry_connect;
        {
            char peer_name[MX_MAX_HOSTNAME_LEN];
            
            if( MX_SUCCESS != mx_nic_id_to_hostname( module_endpoint->mx_peer->nic_id, peer_name ) )
                sprintf( peer_name, "unknown %lx nic_id", (long)module_endpoint->mx_peer->nic_id );
            
            opal_output( 0, "mx_connect fail for %s with key %x (error %s)\n\tUnique ID (local %x remote %x)\n",
                         peer_name, mca_btl_mx_component.mx_filter, mx_strerror(mx_status),
			 module_endpoint->endpoint_btl->mx_unique_network_id,
			 module_endpoint->mx_peer->unique_network_id );
        }
        module_endpoint->status = MCA_BTL_MX_NOT_REACHEABLE;
        return OMPI_ERROR;
    }
    module_endpoint->mx_peer_addr = mx_remote_addr;
    module_endpoint->status       = MCA_BTL_MX_CONNECTED;

    return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
int
ompi_common_mx_initialize(void)
{
    mx_return_t mx_return;
    struct mca_mpool_base_resources_t mpool_resources;
    int index, value;
    
    ompi_common_mx_initialize_ref_cnt++;
    
    if(ompi_common_mx_initialize_ref_cnt == 1) { 
        /* set the MX error handle to always return. This function is the
         * only MX function allowed to be called before mx_init in order
         * to make sure that if the MX is not up and running the MX
         * library does not exit the application.
         */
        mx_set_error_handler(MX_ERRORS_RETURN);
	
	/* If we have a memory manager available, and
	   mpi_leave_pinned == -1, then set mpi_leave_pinned to 1.

	   We have a memory manager if:
	   - we have both FREE and MUNMAP support
	   - we have MUNMAP support and the linux mallopt */
	value = opal_mem_hooks_support_level();
	if ((value & (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT))
            == (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) {
	  index = mca_base_param_find("mpi", NULL, "leave_pinned");
	  if (index >= 0)
            if ((mca_base_param_lookup_int(index, &value) == OPAL_SUCCESS) 
		&& (value == -1)) {
	      
	      ompi_mpi_leave_pinned = 1;
	      setenv("MX_RCACHE", "2", 1);
	      mpool_resources.regcache_clean = mx__regcache_clean;
	      ompi_common_mx_fake_mpool = 
		mca_mpool_base_module_create("fake", NULL, &mpool_resources);
	      if (!ompi_common_mx_fake_mpool) {
		ompi_mpi_leave_pinned = 0;
		setenv("MX_RCACHE", "0", 1);
		opal_output(0, "Error creating fake mpool (error %s)\n",
			    strerror(errno));
	      }
	    }
	}
	
        /* initialize the mx library */
        mx_return = mx_init(); 
        
        if(MX_SUCCESS != mx_return) {
            opal_output(0,
                        "Error in mx_init (error %s)\n",
                        mx_strerror(mx_return));
            return OMPI_ERR_NOT_AVAILABLE;
        }
        
    } 
    return OMPI_SUCCESS;
}
Ejemplo n.º 3
0
mca_btl_base_descriptor_t* mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
                                                   struct mca_btl_base_endpoint_t* endpoint,
                                                   struct mca_mpool_base_registration_t* registration,
                                                   struct ompi_convertor_t* convertor,
                                                   uint8_t order,
                                                   size_t reserve,
                                                   size_t* size,
                                                   uint32_t flags)
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl;
    mca_btl_mx_frag_t* frag;
    mx_return_t mx_return;
    mx_segment_t mx_segment;
    int rc;

    MCA_BTL_MX_FRAG_ALLOC_USER(btl, frag, rc);
    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL;
    }

    frag->segment[0].seg_len       = *size;
    ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment[0].seg_addr.pval) );
    frag->segment[0].seg_key.key64 = (uint64_t)(intptr_t)frag;

    mx_segment.segment_ptr    = frag->segment[0].seg_addr.pval;
    mx_segment.segment_length = frag->segment[0].seg_len;

    mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
                          frag->segment[0].seg_key.key64, 
                          BTL_MX_PUT_MASK, NULL, &(frag->mx_request) );
    if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
        opal_output( 0, "Fail to re-register a fragment with the MX NIC ...\n" );
        MCA_BTL_MX_FRAG_RETURN( btl, frag );
        return NULL;
    }

#ifdef HAVE_MX_FORGET
    {
        mx_return = mx_forget( mx_btl->mx_endpoint, &(frag->mx_request) );
        if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
            opal_output( 0, "mx_forget failed in mca_btl_mx_prepare_dst with error %d (%s)\n",
                         mx_return, mx_strerror(mx_return) );
            return NULL;
        }
    }
#endif

    /* Allow the fragment to be recycled using the mca_btl_mx_free function */
    frag->type             = MCA_BTL_MX_SEND;
    frag->base.des_dst     = frag->segment;
    frag->base.des_dst_cnt = 1;
    frag->base.des_flags   = flags;
    frag->base.order       = MCA_BTL_NO_ORDER;

    return &frag->base;
}
Ejemplo n.º 4
0
int ompi_mtl_mx_module_init(){ 
    mx_param_t mx_param;
    mx_return_t mx_return;
    
    
    /* setup params */
    mx_param.key = MX_PARAM_UNEXP_QUEUE_MAX;
    mx_param.val.unexp_queue_max = ompi_mtl_mx.mx_unexp_queue_max;
    
   
    /* get a local endpoint */
    mx_return = mx_open_endpoint(MX_ANY_NIC, 
                                 MX_ANY_ENDPOINT,
                                 ompi_mtl_mx.mx_filter, 
                                 NULL, 
                                 0,
                                 &ompi_mtl_mx.mx_endpoint);
    
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_open_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    /* get the endpoint address */
    mx_return = mx_get_endpoint_addr( ompi_mtl_mx.mx_endpoint, 
                                      &ompi_mtl_mx.mx_endpoint_addr); 
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_get_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    mx_return = mx_decompose_endpoint_addr( ompi_mtl_mx.mx_endpoint_addr, &(ompi_mtl_mx.mx_addr.nic_id),
                                            &(ompi_mtl_mx.mx_addr.endpoint_id) );
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_decompose_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }


    
    ompi_modex_send( &mca_mtl_mx_component.super.mtl_version, 
                             &ompi_mtl_mx.mx_addr, 
                             sizeof(mca_mtl_mx_addr_t));
    
    /* register the mtl mx progress function */
    opal_progress_register(ompi_mtl_mx_progress);
    
    
    return OMPI_SUCCESS; 
    
        
}
Ejemplo n.º 5
0
int
ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl) { 
    mx_return_t mx_return;
    
    opal_progress_unregister(ompi_mtl_mx_progress);
    
    /* free resources */
    mx_return = mx_close_endpoint(ompi_mtl_mx.mx_endpoint);
    if(mx_return != MX_SUCCESS){ 
        opal_output(ompi_mtl_base_output, "Error in mx_close_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    return ompi_common_mx_finalize();
    
}
Ejemplo n.º 6
0
mca_mtl_mx_endpoint_t* mca_mtl_mx_endpoint_create(ompi_proc_t* ompi_proc) { 
    mca_mtl_mx_endpoint_t* mtl_mx_endpoint = NULL;
    int rc; 
    mca_mtl_mx_addr_t *mx_peer; 
    size_t size;
    mx_return_t mx_return;
    int num_retry = 0;
    /* get the remote proc's address (only one) */
    rc = ompi_modex_recv(&mca_mtl_mx_component.super.mtl_version, 
                                 ompi_proc, (void**)&mx_peer, &size);
    if( rc != OMPI_SUCCESS || size != sizeof(mca_mtl_mx_addr_t)) { 
        return NULL; 
    }
    
    mtl_mx_endpoint = (mca_mtl_mx_endpoint_t*) OBJ_NEW(mca_mtl_mx_endpoint_t);
    mtl_mx_endpoint->mx_peer = mx_peer;
    
 retry_connect:
    mx_return = mx_connect(ompi_mtl_mx.mx_endpoint, 
                           mx_peer->nic_id, 
                           mx_peer->endpoint_id, 
                           ompi_mtl_mx.mx_filter, 
                           ompi_mtl_mx.mx_timeout, 
                           &mtl_mx_endpoint->mx_peer_addr);
    if(MX_SUCCESS != mx_return) { 
        char peer_name[MX_MAX_HOSTNAME_LEN];
        if(MX_TIMEOUT == mx_return) { 
            if( num_retry++ < ompi_mtl_mx.mx_retries ) { 
                goto retry_connect;
            }
        }
        
        if(MX_SUCCESS != mx_nic_id_to_hostname( mx_peer->nic_id, peer_name)) { 
            sprintf( peer_name, "unknown %lx nic_id", (long)mx_peer->nic_id ); 
        }
        opal_output(ompi_mtl_base_output, 
                    "mx_connect fail for %s with key %x (error %s)\n", 
                    peer_name, ompi_mtl_mx.mx_filter, mx_strerror(mx_return) );
        return NULL;
    }
    
    
    return mtl_mx_endpoint;
    
}
Ejemplo n.º 7
0
int
ompi_common_mx_finalize(void)
{
    mx_return_t mx_return;
    ompi_common_mx_initialize_ref_cnt--;
    if( 0 == ompi_common_mx_initialize_ref_cnt ) { 

        if (ompi_common_mx_fake_mpool) 
	  mca_mpool_base_module_destroy(ompi_common_mx_fake_mpool);
        
        mx_return = mx_finalize(); 
        if(mx_return != MX_SUCCESS){ 
            opal_output(0, "Error in mx_finalize (error %s)\n", mx_strerror(mx_return));
            return OMPI_ERROR;
        } 
    }
    return OMPI_SUCCESS;
}
Ejemplo n.º 8
0
int
ompi_mtl_mx_iprobe(struct mca_mtl_base_module_t* mtl, 
                   struct ompi_communicator_t *comm,
                   int src,
                   int tag,
                   int *flag,
                   struct ompi_status_public_t *status)
{
    uint32_t result;
    mx_return_t ret;
    mx_status_t mx_status;
    uint64_t match_bits;
    uint64_t mask_bits;

    MX_SET_RECV_BITS(match_bits, 
                     mask_bits,
                     comm->c_contextid,
                     src,
                     tag);

    ret = mx_iprobe(ompi_mtl_mx.mx_endpoint,
                    match_bits,
                    mask_bits,
                    &mx_status,
                    &result);
    if (MX_SUCCESS != ret) {
        opal_output(ompi_mtl_base_output, "Error in mx_iprobe (error %s)\n", mx_strerror(ret));
        return OMPI_ERROR;
    }

    if (result) {
        if(MPI_STATUS_IGNORE != status) { 
            MX_GET_SRC(mx_status.match_info, status->MPI_SOURCE);
            MX_GET_TAG(mx_status.match_info, status->MPI_TAG); 
            status->_ucount = mx_status.msg_length;
        }
        *flag = 1;
    } else {
        *flag = 0;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 9
0
/**
 * Initiate an asynchronous put. 
 *
 * @param btl (IN)         BTL module
 * @param endpoint (IN)    BTL addressing information
 * @param descriptor (IN)  Description of the data to be transferred
 */
static int mca_btl_mx_put( struct mca_btl_base_module_t* btl,
                           struct mca_btl_base_endpoint_t* endpoint,
                           struct mca_btl_base_descriptor_t* descriptor )
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl;
    mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
    mx_segment_t mx_segment[2];
    mx_return_t mx_return;
    uint32_t i = 0;

    if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
        if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERROR;
        if( MCA_BTL_MX_CONNECTION_PENDING == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERR_OUT_OF_RESOURCE;
        if( OMPI_SUCCESS != mca_btl_mx_proc_connect( (mca_btl_mx_endpoint_t*)endpoint ) )
            return OMPI_ERROR;
    }

    frag->endpoint         = endpoint;
    frag->type             = MCA_BTL_MX_SEND;
    descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

    do {
        mx_segment[i].segment_ptr    = descriptor->des_src[i].seg_addr.pval;
        mx_segment[i].segment_length = descriptor->des_src[i].seg_len;
    } while (++i < descriptor->des_src_cnt);

    mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
                          endpoint->mx_peer_addr,
                          descriptor->des_dst[0].seg_key.key64, frag,
                          &frag->mx_request );
    if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
        opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Ejemplo n.º 10
0
int 
main(int argc, char **argv)
{
	mx_endpoint_t ep;
	uint64_t nic_id;
	uint16_t my_eid;
	uint64_t his_nic_id;
	uint32_t board_id;
	uint32_t filter;
	uint16_t his_eid;
	mx_endpoint_addr_t his_addr;
	char *rem_host;
	int len;
	int iter;
	int c;
	int do_wait;
	int do_bothways;
	extern char *optarg;
	mx_return_t ret;

#if DEBUG
	extern int mx_debug_mask;
	mx_debug_mask = 0xFFF;
#endif

	mx_init();
	MX_MUTEX_INIT(&stream_mutex);
	/* set up defaults */
	rem_host = NULL;
	filter = FILTER;
	my_eid = DFLT_EID;
	his_eid = DFLT_EID;
	board_id = MX_ANY_NIC;
	len = DFLT_LEN;
	iter = DFLT_ITER;
	do_wait = 0;
	do_bothways = 0;
	num_threads = 1;

	while ((c = getopt(argc, argv, "hd:e:f:n:b:r:l:N:Vvwx")) != EOF) switch(c) {
	case 'd':
		rem_host = optarg;
		break;
	case 'e':
		my_eid = atoi(optarg);
		break;
	case 'f':
		filter = atoi(optarg);
		break;
	case 'n':
		sscanf(optarg, "%"SCNx64, &nic_id);
		mx_nic_id_to_board_number(nic_id, &board_id);
		break;
	case 'b':
		board_id = atoi(optarg);
		break;
	case 'r':
		his_eid = atoi(optarg);
		break;
	case 'l':
		len = atoi(optarg);
		if (len > MAX_LEN) {
			fprintf(stderr, "len too large, max is %d\n", MAX_LEN);
			exit(1);
		}
		break;
	case 'N':
		iter = atoi(optarg);
		break;
	case 'V':
		Verify = 1;
		break;
	case 'v':
		do_verbose = 1;
		break;
	case 'w':
		do_wait = 1;
		break;
	case 'x':
#if MX_THREAD_SAFE
		do_bothways = 1;
#else
		fprintf(stderr, "bi-directional mode only supported with threadsafe mx lib\n");
		exit(1);
#endif
		break;
	case 'h':
	default:
		usage();
		exit(1);
	}

	if (rem_host != NULL)
		num_threads += do_bothways;
	ret = mx_open_endpoint(board_id, my_eid, filter, NULL, 0, &ep);
	if (ret != MX_SUCCESS) {
		fprintf(stderr, "Failed to open endpoint %s\n", mx_strerror(ret));
		exit(1);
	}

	/* If no host, we are receiver */
	if (rem_host == NULL) {
		if (do_verbose)
			printf("Starting streaming receiver\n");
		if (Verify) {
			fprintf(stderr, "-V ignored.  Verify must be set by sender\n");
			Verify = 0;
		}

		if (do_wait)
			receiver_blocking(ep, MATCH_VAL_MAIN, filter);
		else
			receiver_polling(ep, MATCH_VAL_MAIN, filter);
		

	} else {
		/* get address of destination */
		mx_hostname_to_nic_id(rem_host, &his_nic_id);
		mx_connect(ep, his_nic_id, his_eid, filter, 
			   MX_INFINITE, &his_addr);
		if (do_verbose)
			printf("Starting streaming send to host %s\n", 
			       rem_host);
		if (Verify) printf("Verifying results\n");

		/* start up the sender */
		if (do_wait)
			sender_blocking(ep, his_addr, iter, len, 
					do_bothways,MATCH_VAL_MAIN);
		else
			sender_polling(ep, his_addr, iter, len, 
				       do_bothways, MATCH_VAL_MAIN);
	}		

  
	mx_close_endpoint(ep);
	mx_finalize();
	exit(0);
}
Ejemplo n.º 11
0
int
ompi_mtl_mx_isend(struct mca_mtl_base_module_t* mtl, 
                  struct ompi_communicator_t* comm,
                  int dest,
                  int tag,
                  struct opal_convertor_t *convertor,
                  mca_pml_base_send_mode_t mode,
                  bool blocking,
                  mca_mtl_request_t * mtl_request)
{
    mx_return_t mx_return;
    uint64_t match_bits;
    mca_mtl_mx_request_t * mtl_mx_request = (mca_mtl_mx_request_t*) mtl_request;
    size_t length;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
    char* where;

    assert(mtl == &ompi_mtl_mx.super);

    MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); 
    
    ompi_mtl_datatype_pack(convertor, 
                           &mtl_mx_request->mx_segment[0].segment_ptr, 
                           &length,
                           &mtl_mx_request->free_after);
    mtl_mx_request->mx_segment[0].segment_length = length;
    mtl_mx_request->convertor = convertor;
    mtl_mx_request->type = OMPI_MTL_MX_ISEND;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "issend bits: 0x%016" PRIu64 "\n", match_bits));

    if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { 
        mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, 
                               mtl_mx_request->mx_segment, 
                               1,
                               mx_endpoint->mx_peer_addr, 
                               match_bits, 
                               mtl_mx_request, 
                               &mtl_mx_request->mx_request
                               );
        where = "mx_issend";
    } else { 
        mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, 
                              mtl_mx_request->mx_segment,
                              1,
                              mx_endpoint->mx_peer_addr,
                              match_bits,
                              mtl_mx_request,
                              &mtl_mx_request->mx_request
                              );
        where = "mx_isend";
    }
    if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
        char peer_name[MX_MAX_HOSTNAME_LEN];
        if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { 
            sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); 
        }
        opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n",
                    where, mx_strerror(mx_return), peer_name);
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Ejemplo n.º 12
0
int mca_btl_mx_send( struct mca_btl_base_module_t* btl,
                     struct mca_btl_base_endpoint_t* endpoint,
                     struct mca_btl_base_descriptor_t* descriptor, 
                     mca_btl_base_tag_t tag )
   
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl;
    mca_btl_mx_frag_t* frag = (mca_btl_mx_frag_t*)descriptor;
    mx_segment_t mx_segment[2];
    mx_return_t mx_return;
    uint64_t total_length = 0, tag64;
    uint32_t i = 0;
    int btl_ownership = (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

    if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
        if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERROR;
        if( MCA_BTL_MX_CONNECTION_PENDING == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERR_OUT_OF_RESOURCE;
        if( OMPI_SUCCESS != mca_btl_mx_proc_connect( (mca_btl_mx_endpoint_t*)endpoint ) )
            return OMPI_ERROR;
    }

    frag->endpoint  = endpoint;
    frag->type      = MCA_BTL_MX_SEND;

    do {
        mx_segment[i].segment_ptr    = descriptor->des_src[i].seg_addr.pval;
        mx_segment[i].segment_length = descriptor->des_src[i].seg_len;
        total_length += descriptor->des_src[i].seg_len;
    } while (++i < descriptor->des_src_cnt);

    tag64 = 0x01ULL | (((uint64_t)tag) << 8);
    mx_return = mx_isend( mx_btl->mx_endpoint, mx_segment, descriptor->des_src_cnt,
                          endpoint->mx_peer_addr,
                          tag64, frag, &frag->mx_request );
    if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
        opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
        return OMPI_ERROR;
    }

#ifdef HAVE_MX_FORGET
    {
        uint32_t mx_result;
        mx_return = mx_ibuffered( mx_btl->mx_endpoint, &(frag->mx_request), &mx_result );
        if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
            opal_output( 0, "mx_ibuffered failed with error %d (%s)\n",
                         mx_return, mx_strerror(mx_return) );
            frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
            return OMPI_ERROR;
        }
        if( mx_result ) {
            mx_return = mx_forget( mx_btl->mx_endpoint, &(frag->mx_request) );
            if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
                opal_output( 0, "mx_forget failed with error %d (%s)\n",
                             mx_return, mx_strerror(mx_return) );
                frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
                return OMPI_SUCCESS;
            }

            if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
                frag->base.des_cbfunc( &(mx_btl->super), frag->endpoint,
                                       &(frag->base), OMPI_SUCCESS);
            }
            if( btl_ownership ) {
                MCA_BTL_MX_FRAG_RETURN( mx_btl, frag );
            }
            return 1;
        }
    }
#endif
    if( 2048 > total_length ) {
        mx_status_t mx_status;
        uint32_t mx_result;

        /* let's check for completness */
        mx_return = mx_test( mx_btl->mx_endpoint, &(frag->mx_request),
                             &mx_status, &mx_result );
        if( OPAL_LIKELY(MX_SUCCESS == mx_return) ) {
            if( mx_result ) {
                if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
                    frag->base.des_cbfunc( &(mx_btl->super), frag->endpoint,
                                           &(frag->base), OMPI_SUCCESS);
                }
                if( btl_ownership ) {
                    MCA_BTL_MX_FRAG_RETURN( mx_btl, frag );
                }
                return 1;
            }
        }
    }
    frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;

    return OMPI_SUCCESS;
}
Ejemplo n.º 13
0
/**
 * Initiate an inline send to the peer. If failure then return a descriptor.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
static int mca_btl_mx_sendi( struct mca_btl_base_module_t* btl,
                             struct mca_btl_base_endpoint_t* endpoint,
                             struct ompi_convertor_t* convertor,
                             void* header,
                             size_t header_size,
                             size_t payload_size,
                             uint8_t order,
                             uint32_t flags,
                             mca_btl_base_tag_t tag,
                             mca_btl_base_descriptor_t** descriptor )
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; 
    size_t max_data;
    
    if( OPAL_UNLIKELY(MCA_BTL_MX_CONNECTED != ((mca_btl_mx_endpoint_t*)endpoint)->status) ) {
        if( MCA_BTL_MX_NOT_REACHEABLE == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERROR;
        if( MCA_BTL_MX_CONNECTION_PENDING == ((mca_btl_mx_endpoint_t*)endpoint)->status )
            return OMPI_ERR_OUT_OF_RESOURCE;
        if( OMPI_SUCCESS != mca_btl_mx_proc_connect( (mca_btl_mx_endpoint_t*)endpoint ) )
            return OMPI_ERROR;
    }
    
    if( !ompi_convertor_need_buffers(convertor) ) {
        uint32_t mx_segment_count = 0;
        uint64_t tag64 = 0x01ULL | (((uint64_t)tag) << 8);
        mx_return_t mx_return;
        mx_request_t mx_request;
        mx_segment_t mx_segments[2], *mx_segment = mx_segments;
        
        if( 0 != header_size ) {
            mx_segment->segment_ptr    = header;
            mx_segment->segment_length = header_size;
            mx_segment++;
            mx_segment_count++;
        }
        if( 0 != payload_size ) {
            struct iovec iov;
            uint32_t iov_count = 1;
            
            iov.iov_base = NULL;
            iov.iov_len = payload_size;
            
            (void)ompi_convertor_pack( convertor, &iov, &iov_count, &max_data );
            assert( max_data == payload_size );
            
            mx_segment->segment_ptr    = iov.iov_base;
            mx_segment->segment_length = max_data;
            mx_segment_count++;
        }
        
        mx_return = mx_isend( mx_btl->mx_endpoint, mx_segments, mx_segment_count,
                              endpoint->mx_peer_addr, tag64, NULL, &mx_request );
        if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
            opal_output( 0, "mx_isend fails with error %s\n", mx_strerror(mx_return) );
            return OMPI_ERROR;
        }
#ifdef HAVE_MX_FORGET
        {
            uint32_t mx_result;
            mx_return = mx_ibuffered( mx_btl->mx_endpoint, &mx_request, &mx_result );
            if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
                opal_output( 0, "mx_ibuffered failed with error %d (%s)\n",
                             mx_return, mx_strerror(mx_return) );
                return OMPI_SUCCESS;
            }
            if( mx_result ) {
                mx_return = mx_forget( mx_btl->mx_endpoint, &mx_request );
                if( OPAL_UNLIKELY(MX_SUCCESS != mx_return) ) {
                    opal_output( 0, "mx_forget failed with error %d (%s)\n",
                                 mx_return, mx_strerror(mx_return) );
                }
            }
            return OMPI_SUCCESS;
        }
#endif
    }
    /* No optimization on this path. Just allocate a descriptor and return it
     * to the user.
     */
    *descriptor = mca_btl_mx_alloc( btl, endpoint, order,
                                    header_size + payload_size, flags );
    return OMPI_ERR_RESOURCE_BUSY;
}
Ejemplo n.º 14
0
int
ompi_mtl_mx_send(struct mca_mtl_base_module_t* mtl, 
                 struct ompi_communicator_t* comm,
                 int dest,
                 int tag,
                 struct opal_convertor_t *convertor,
                 mca_pml_base_send_mode_t mode)
{
    mx_return_t mx_return;
    uint64_t match_bits;
    mca_mtl_mx_request_t mtl_mx_request;
    size_t length;
    mx_status_t mx_status;
    uint32_t result;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_mx_endpoint_t* mx_endpoint = (mca_mtl_mx_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
    char* where;

    assert(mtl == &ompi_mtl_mx.super);

    MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); 
    
    ompi_mtl_datatype_pack(convertor, 
                           &mtl_mx_request.mx_segment[0].segment_ptr, 
                           &length,
                           &mtl_mx_request.free_after);

    mtl_mx_request.mx_segment[0].segment_length = length;
    mtl_mx_request.convertor = convertor;
    mtl_mx_request.type = OMPI_MTL_MX_ISEND;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "issend bits: 0x%016" PRIu64 "\n", 
                         match_bits));

    if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { 
        mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, 
                               mtl_mx_request.mx_segment, 
                               1,
                               mx_endpoint->mx_peer_addr, 
                               match_bits, 
                               &mtl_mx_request, 
                               &mtl_mx_request.mx_request
                               );
        where = "mx_issend";
    } else { 
        mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, 
                              mtl_mx_request.mx_segment,
                              1,
                              mx_endpoint->mx_peer_addr,
                              match_bits,
                              &mtl_mx_request,
                              &mtl_mx_request.mx_request
                              );
        where = "mx_isend";
    }
    if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
        char peer_name[MX_MAX_HOSTNAME_LEN];
        if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { 
            sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); 
        }
        opal_output(ompi_mtl_base_framework.framework_output, "Error in %s (error %s) sending to %s\n",
                    where, mx_strerror(mx_return), peer_name);

        /* Free buffer if needed */
        if(mtl_mx_request.free_after) { 
            free(mtl_mx_request.mx_segment[0].segment_ptr);
        }
        return OMPI_ERROR;
    }
    
    do { 
        mx_return = mx_test(ompi_mtl_mx.mx_endpoint, 
                            &mtl_mx_request.mx_request,
                            &mx_status,
                            &result);
        if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
            opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_wait (error %s)\n", mx_strerror(mx_return));
            abort();
        }
        if( OPAL_UNLIKELY(result && mx_status.code != MX_STATUS_SUCCESS) ) { 
            opal_output(ompi_mtl_base_framework.framework_output, 
                        "Error in ompi_mtl_mx_send, mx_wait returned something other than MX_STATUS_SUCCESS: mx_status.code = %d.\n", 
                        mx_status.code);
            abort();
        }
    } while(!result);

    /* Free buffer if needed */
    if(mtl_mx_request.free_after) { 
        free(mtl_mx_request.mx_segment[0].segment_ptr);
    }
    
    return OMPI_SUCCESS;
}
Ejemplo n.º 15
0
int MPID_nem_mx_cancel_recv(MPIDI_VC_t *vc, MPID_Request *rreq)
{
    mx_request_t *mx_request = NULL;
    mx_return_t   ret;
    uint32_t      result;
    int           mpi_errno = MPI_SUCCESS;
    int           handled = FALSE;
   
    mx_request = &(REQ_FIELD(rreq,mx_request));
    /* FIXME this test is probably not correct with multiple netmods        */
    /* We need to know to which netmod a recv request actually "belongs" to */
    if(mx_request != NULL)
    {
       ret = mx_cancel(MPID_nem_mx_local_endpoint,mx_request,&result);
       MPIU_ERR_CHKANDJUMP1(ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_cancel", "**mx_cancel %s", mx_strerror(ret));
       
       if (result)
       {	    
	  int found;
	  rreq->status.cancelled = TRUE;
	  found = MPIDI_CH3U_Recvq_DP(rreq);
	  MPIU_Assert(found);
	  rreq->status.count = 0;
	  MPID_REQUEST_SET_COMPLETED(rreq);
	  MPID_Request_release(rreq);       
       }
       else
       {
	  rreq->status.cancelled = FALSE;
	  MPIU_DBG_MSG_P(CH3_OTHER,VERBOSE,
			 "request 0x%08x already matched, unable to cancel", rreq->handle);
       }
       handled = TRUE;
     }
   
 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Ejemplo n.º 16
0
int ompi_mtl_mx_progress( void ) { 
    mx_return_t mx_return;
    mx_request_t mx_request;
    mx_status_t mx_status;
    uint32_t result;
    mca_mtl_mx_request_t* mtl_mx_request;
    int completed = 0;
    
    while(1){
        mx_return = mx_ipeek(ompi_mtl_mx.mx_endpoint, 
                             &mx_request, 
                             &result);
        
        if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
            opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_ipeek (error %s)\n", mx_strerror(mx_return));
        }
        if(result) { 
            completed++;
            mx_return = mx_test(ompi_mtl_mx.mx_endpoint, 
                                &mx_request, 
                                &mx_status,
                                &result);
            if( OPAL_UNLIKELY(mx_return != MX_SUCCESS) ) { 
                opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_test (error %s)\n", mx_strerror(mx_return));
                abort();
            }
            if( OPAL_UNLIKELY(0 == result) ) { 
                opal_output(ompi_mtl_base_framework.framework_output, "Error in ompi_mtl_mx_progress, mx_ipeek returned a request, mx_test on the request resulted failure.\n");
                abort();
            }
            mtl_mx_request = (mca_mtl_mx_request_t*) mx_status.context;
            if(OMPI_MTL_MX_ISEND == mtl_mx_request->type) { 
                if(mtl_mx_request->free_after) { 
                    free(mtl_mx_request->mx_segment[0].segment_ptr);
                }
            } else {
                assert( OMPI_MTL_MX_IRECV == mtl_mx_request->type );
                
                ompi_mtl_datatype_unpack(mtl_mx_request->convertor, 
                                         mtl_mx_request->mx_segment[0].segment_ptr, 
                                         mx_status.xfer_length);
                /* set the status */
                MX_GET_SRC(mx_status.match_info,
                           mtl_mx_request->super.ompi_req->req_status.MPI_SOURCE);
                MX_GET_TAG(mx_status.match_info,
                           mtl_mx_request->super.ompi_req->req_status.MPI_TAG); 
                mtl_mx_request->super.ompi_req->req_status._ucount = 
                    mx_status.xfer_length;
            }
            /* suppose everything went just fine ... */
            mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
            if( OPAL_UNLIKELY(MX_STATUS_SUCCESS != mx_status.code) ) {
                if( MX_STATUS_TRUNCATED == mx_status.code ) {
                    mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
                } else {
                    mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = MPI_ERR_INTERN;
                }
                return completed;
            }
            mtl_mx_request->super.completion_callback(&mtl_mx_request->super);
            return completed;
        } else { 
            return completed;
        }
    }
}
Ejemplo n.º 17
0
int
MPID_nem_mx_vc_init (MPIDI_VC_t *vc)
{
   uint32_t threshold;
   MPIDI_CH3I_VC *vc_ch = VC_CH(vc);
   int mpi_errno = MPI_SUCCESS;

   /* first make sure that our private fields in the vc fit into the area provided  */
   MPIU_Assert(sizeof(MPID_nem_mx_vc_area) <= MPID_NEM_VC_NETMOD_AREA_LEN);

#ifdef ONDEMAND
   VC_FIELD(vc, local_connected)  = 0;
   VC_FIELD(vc, remote_connected) = 0;
#else
   {
       char *business_card;
       int   val_max_sz;
       int   ret;
#ifdef USE_PMI2_API
       val_max_sz = PMI2_MAX_VALLEN;
#else
       mpi_errno = PMI_KVS_Get_value_length_max(&val_max_sz);
#endif 
       business_card = (char *)MPIU_Malloc(val_max_sz); 
       mpi_errno = vc->pg->getConnInfo(vc->pg_rank, business_card,val_max_sz, vc->pg);
       if (mpi_errno) MPIU_ERR_POP(mpi_errno);
       
       mpi_errno = MPID_nem_mx_get_from_bc (business_card, &VC_FIELD(vc, remote_endpoint_id), &VC_FIELD(vc, remote_nic_id));
       if (mpi_errno)    MPIU_ERR_POP (mpi_errno);

       MPIU_Free(business_card);
       
       ret = mx_connect(MPID_nem_mx_local_endpoint,VC_FIELD(vc, remote_nic_id),VC_FIELD(vc, remote_endpoint_id),
			MPID_NEM_MX_FILTER,MX_INFINITE,&(VC_FIELD(vc, remote_endpoint_addr)));
       MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_connect", "**mx_connect %s", mx_strerror (ret));
       mx_set_endpoint_addr_context(VC_FIELD(vc, remote_endpoint_addr),(void *)vc);

       MPIDI_CHANGE_VC_STATE(vc, ACTIVE);
   }
#endif
   mx_get_info(MPID_nem_mx_local_endpoint, MX_COPY_SEND_MAX, NULL, 0, &threshold, sizeof(uint32_t));

   vc->eager_max_msg_sz = threshold;
   vc->rndvSend_fn      = NULL;
   vc->sendNoncontig_fn = MPID_nem_mx_SendNoncontig;
   vc->comm_ops         = &comm_ops;
 
   vc_ch->iStartContigMsg = MPID_nem_mx_iStartContigMsg;
   vc_ch->iSendContig     = MPID_nem_mx_iSendContig;

 fn_exit:
   return mpi_errno;
 fn_fail:
   goto fn_exit;
}
Ejemplo n.º 18
0
static int init_mx( MPIDI_PG_t *pg_p )
{
   mx_endpoint_addr_t local_endpoint_addr;
   mx_return_t        ret;
   mx_param_t         param;
   int                mpi_errno = MPI_SUCCESS;
   int                r;

   r = MPL_putenv("MX_DISABLE_SHARED=1");
   MPIU_ERR_CHKANDJUMP(r, mpi_errno, MPI_ERR_OTHER, "**putenv");
   r = MPL_putenv("MX_DISABLE_SELF=1");
   MPIU_ERR_CHKANDJUMP(r, mpi_errno, MPI_ERR_OTHER, "**putenv");

   ret = mx_init();
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_init", "**mx_init %s", mx_strerror (ret));
   
   mx_set_error_handler(MX_ERRORS_RETURN);

   /*
   ret = mx_get_info(NULL, MX_NIC_COUNT, NULL, 0, &nic_count, sizeof(int));
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_get_info", "**mx_get_info %s", mx_strerror (ret));
   
   count = ++nic_count;
   mx_nics = (uint64_t *)MPIU_Malloc(count*sizeof(uint64_t));
   ret = mx_get_info(NULL, MX_NIC_IDS, NULL, 0, mx_nics, count*sizeof(uint64_t));
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_get_info", "**mx_get_info %s", mx_strerror (ret));
    
    do{	     
      ret = mx_nic_id_to_board_number(mx_nics[index],&mx_board_num);
      index++;
   }while(ret != MX_SUCCESS);
   */
#ifndef USE_CTXT_AS_MARK
   param.key = MX_PARAM_CONTEXT_ID;
   param.val.context_id.bits  = NEM_MX_MATCHING_BITS - SHIFT_TYPE;
   param.val.context_id.shift = SHIFT_TYPE;
   ret = mx_open_endpoint(MX_ANY_NIC,MX_ANY_ENDPOINT,MPID_NEM_MX_FILTER,&param,1,&MPID_nem_mx_local_endpoint);
#else
   ret = mx_open_endpoint(MX_ANY_NIC,MX_ANY_ENDPOINT,MPID_NEM_MX_FILTER,NULL,0,&MPID_nem_mx_local_endpoint);
#endif
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_open_endpoint", "**mx_open_endpoint %s", mx_strerror (ret));
      
   ret = mx_get_endpoint_addr(MPID_nem_mx_local_endpoint,&local_endpoint_addr);
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_get_endpoint_addr", "**mx_get_endpoint_addr %s", mx_strerror (ret));   
   
   ret = mx_decompose_endpoint_addr(local_endpoint_addr,&MPID_nem_mx_local_nic_id,&MPID_nem_mx_local_endpoint_id);
   MPIU_ERR_CHKANDJUMP1 (ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_decompose_endpoint_addr", "**mx_decompose_endpoint_addr %s", mx_strerror (ret));
   
 fn_exit:
   return mpi_errno;
 fn_fail:
   goto fn_exit;
}
Ejemplo n.º 19
0
int mca_btl_mx_register( struct mca_btl_base_module_t* btl, 
                         mca_btl_base_tag_t tag, 
                         mca_btl_base_module_recv_cb_fn_t cbfunc, 
                         void* cbdata )
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; 

#if 0
    if( (NULL != cbfunc) && ( 0 == mca_btl_mx_component.mx_use_unexpected) ) {
#endif
    if( NULL != cbfunc ) {
        mca_btl_mx_frag_t* frag;
        mx_return_t mx_return;
        mx_segment_t mx_segment;
        int i, rc;

        /* Post the receives if there is no unexpected handler */
        for( i = 0; i < mca_btl_mx_component.mx_max_posted_recv; i++ ) {
            MCA_BTL_MX_FRAG_ALLOC_EAGER( mx_btl, frag, rc );
            if( NULL == frag ) {
                opal_output( 0, "mca_btl_mx_register: unable to allocate more eager fragments\n" );
                if( 0 == i ) {
                    return OMPI_ERROR;
                }
                break;  /* some fragments are already registered. Try to continue... */
            }
            frag->base.des_dst     = frag->segment;
            frag->base.des_dst_cnt = 1;
            frag->base.des_src     = NULL;
            frag->base.des_src_cnt = 0;
            frag->mx_frag_list     = NULL;
            frag->type             = MCA_BTL_MX_RECV;
            
            mx_segment.segment_ptr    = (void*)(frag+1);
            mx_segment.segment_length = mx_btl->super.btl_eager_limit;
            mx_return = mx_irecv( mx_btl->mx_endpoint, &mx_segment, 1,
                                  0x01ULL, BTL_MX_RECV_MASK,
                                  frag, &(frag->mx_request) );
            if( MX_SUCCESS != mx_return ) {
                opal_output( 0, "mca_btl_mx_register: mx_irecv failed with status %d (%s)\n",
                             mx_return, mx_strerror(mx_return) );
                MCA_BTL_MX_FRAG_RETURN( mx_btl, frag );
                return OMPI_ERROR;
            }
        }
    }

    return OMPI_SUCCESS;
}


/**
 * Allocate a segment.
 *
 * @param btl (IN)      BTL module
 * @param size (IN)     Request segment size.
 */

mca_btl_base_descriptor_t* mca_btl_mx_alloc( struct mca_btl_base_module_t* btl,
                                             struct mca_btl_base_endpoint_t* endpoint,
                                             uint8_t order,
                                             size_t size,
                                             uint32_t flags)
{
    mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*) btl; 
    mca_btl_mx_frag_t* frag;
    int rc;
    
    MCA_BTL_MX_FRAG_ALLOC_EAGER(mx_btl, frag, rc);
    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL;
    }
    frag->segment[0].seg_len = 
        size <= mx_btl->super.btl_eager_limit ? 
        size : mx_btl->super.btl_eager_limit ;
    frag->segment[0].seg_addr.pval = (void*)(frag+1);
    frag->base.des_src = frag->segment;
    frag->base.des_src_cnt = 1;
    frag->base.des_flags = flags;
    frag->base.order = MCA_BTL_NO_ORDER;

    return (mca_btl_base_descriptor_t*)frag;
}
Ejemplo n.º 20
0
int ompi_mtl_mx_module_init(){ 
    mx_param_t mx_param;
    mx_return_t mx_return;
    int32_t nic, ep;
    
    /* setup params */
    mx_param.key = MX_PARAM_UNEXP_QUEUE_MAX;
    mx_param.val.unexp_queue_max = ompi_mtl_mx.mx_unexp_queue_max;
    
    /* get a local endpoint */
    nic = ompi_mtl_mx.mx_board_num;
    if (nic < 0) {
      nic = MX_ANY_NIC;
    }
    ep = ompi_mtl_mx.mx_endpoint_num;
    if (ep < 0) {
      ep = MX_ANY_ENDPOINT;
    }
    mx_return = mx_open_endpoint(nic,
                                 ep,
                                 ompi_mtl_mx.mx_filter, 
                                 NULL, 
                                 0,
                                 &ompi_mtl_mx.mx_endpoint);
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_open_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    /* get the endpoint address */
    mx_return = mx_get_endpoint_addr( ompi_mtl_mx.mx_endpoint, 
                                      &ompi_mtl_mx.mx_endpoint_addr); 
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_get_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    mx_return = mx_decompose_endpoint_addr( ompi_mtl_mx.mx_endpoint_addr, &(ompi_mtl_mx.mx_addr.nic_id),
                                            &(ompi_mtl_mx.mx_addr.endpoint_id) );
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_decompose_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    opal_output_verbose(10, ompi_mtl_base_framework.framework_output, 
			"mtl:mx: local nic %d, endpoint %d, got nic %d, ep %d\n", nic, ep, 
            (int)ompi_mtl_mx.mx_addr.nic_id,
			ompi_mtl_mx.mx_addr.endpoint_id);

    ompi_modex_send( &mca_mtl_mx_component.super.mtl_version, 
                             &ompi_mtl_mx.mx_addr, 
                             sizeof(mca_mtl_mx_addr_t));
    
    /* register the mtl mx progress function */
    opal_progress_register(ompi_mtl_mx_progress);
    
    return OMPI_SUCCESS; 
}
Ejemplo n.º 21
0
int MPID_nem_mx_cancel_send(MPIDI_VC_t *vc, MPID_Request *sreq)
{
    mx_request_t *mx_request = NULL;
    mx_return_t ret;
    uint32_t    result;
    int mpi_errno = MPI_SUCCESS;
    int handled = FALSE;
   
     if (!VC_CH(vc)->is_local)
     {
	mx_request = &(REQ_FIELD(sreq,mx_request));
	ret = mx_cancel(MPID_nem_mx_local_endpoint,mx_request,&result);
	MPIU_ERR_CHKANDJUMP1(ret != MX_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**mx_cancel", "**mx_cancel %s", mx_strerror(ret));
	
	if (result)
	{
	   sreq->status.cancelled = TRUE;
	   sreq->cc = 0;
	   MPIU_Object_set_ref(sreq, 1);       
	   MPID_nem_mx_pending_send_req--;
	}
	else
        {	    
	   sreq->status.cancelled = FALSE;
	}
	handled = TRUE;
     }
   
 fn_exit:
    return handled;
 fn_fail:
    goto fn_exit;
}