Пример #1
0
int mca_pml_ucx_init(void)
{
    ucs_status_t status;
    int rc;

    PML_UCX_VERBOSE(1, "mca_pml_ucx_init");

    /* TODO check MPI thread mode */
    status = ucp_worker_create(ompi_pml_ucx.ucp_context, UCS_THREAD_MODE_SINGLE,
                              &ompi_pml_ucx.ucp_worker);
    if (UCS_OK != status) {
        return OMPI_ERROR;
    }

    rc = mca_pml_ucx_send_worker_address();
    if (rc < 0) {
        return rc;
    }

    /* Initialize the free lists */
    OBJ_CONSTRUCT(&ompi_pml_ucx.persistent_reqs, mca_pml_ucx_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_ucx.convs,           mca_pml_ucx_freelist_t);

    /* Create a completed request to be returned from isend */
    OBJ_CONSTRUCT(&ompi_pml_ucx.completed_send_req, ompi_request_t);
    mca_pml_ucx_completed_request_init(&ompi_pml_ucx.completed_send_req);

    opal_progress_register(mca_pml_ucx_progress);

    PML_UCX_VERBOSE(2, "created ucp context %p, worker %p",
                    (void *)ompi_pml_ucx.ucp_context,
                    (void *)ompi_pml_ucx.ucp_worker);
    return OMPI_SUCCESS;
}
Пример #2
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    static bool libhcoll_initialized = false;
    *priority = 0;
    module = NULL;

    if (!mca_coll_hcoll_component.hcoll_enable){
        goto exit;
    }

    if (!libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init

           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(hcoll_progress_fn);
        int rc = hcoll_init();

        if (HCOLL_SUCCESS != rc){
            mca_coll_hcoll_component.hcoll_enable = 0;
            opal_progress_unregister(hcoll_progress_fn);
            HCOL_VERBOSE(0,"Hcol library init failed");
            return NULL;
        }
        libhcoll_initialized = true;
    }
    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module){
        goto exit;
    }

    if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
        goto exit;
    }



    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */  NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;

    *priority = mca_coll_hcoll_component.hcoll_priority;
    module = &hcoll_module->super;


exit:
    return module;
}
Пример #3
0
int ompi_mtl_mx_module_init(){ 
    mx_param_t mx_param;
    mx_return_t mx_return;
    int32_t nic, ep;
    
    /* setup params */
    mx_param.key = MX_PARAM_UNEXP_QUEUE_MAX;
    mx_param.val.unexp_queue_max = ompi_mtl_mx.mx_unexp_queue_max;
    
    /* get a local endpoint */
    nic = ompi_mtl_mx.mx_board_num;
    if (nic < 0) {
      nic = MX_ANY_NIC;
    }
    ep = ompi_mtl_mx.mx_endpoint_num;
    if (ep < 0) {
      ep = MX_ANY_ENDPOINT;
    }
    mx_return = mx_open_endpoint(nic,
                                 ep,
                                 ompi_mtl_mx.mx_filter, 
                                 NULL, 
                                 0,
                                 &ompi_mtl_mx.mx_endpoint);
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_open_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    /* get the endpoint address */
    mx_return = mx_get_endpoint_addr( ompi_mtl_mx.mx_endpoint, 
                                      &ompi_mtl_mx.mx_endpoint_addr); 
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_get_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    mx_return = mx_decompose_endpoint_addr( ompi_mtl_mx.mx_endpoint_addr, &(ompi_mtl_mx.mx_addr.nic_id),
                                            &(ompi_mtl_mx.mx_addr.endpoint_id) );
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_decompose_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    opal_output_verbose(10, ompi_mtl_base_framework.framework_output, 
			"mtl:mx: local nic %d, endpoint %d, got nic %d, ep %d\n", nic, ep, 
            (int)ompi_mtl_mx.mx_addr.nic_id,
			ompi_mtl_mx.mx_addr.endpoint_id);

    ompi_modex_send( &mca_mtl_mx_component.super.mtl_version, 
                             &ompi_mtl_mx.mx_addr, 
                             sizeof(mca_mtl_mx_addr_t));
    
    /* register the mtl mx progress function */
    opal_progress_register(ompi_mtl_mx_progress);
    
    return OMPI_SUCCESS; 
}
Пример #4
0
int mca_pml_yalla_init(void)
{
    mxm_error_t error;
    int rc;

    PML_YALLA_VERBOSE(1, "mca_pml_yalla_init");

    if (ompi_pml_yalla.using_mem_hooks) {
        opal_mem_hooks_register_release(mca_pml_yalla_mem_release_cb, NULL);
    }

    error = mxm_ep_create(ompi_pml_yalla.mxm_context, ompi_pml_yalla.ep_opts,
                          &ompi_pml_yalla.mxm_ep);
    if (MXM_OK != error) {
        return OMPI_ERROR;
    }

    rc = send_ep_address();
    if (rc < 0) {
        return rc;
    }

    OBJ_CONSTRUCT(&ompi_pml_yalla.send_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.bsend_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.recv_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.convs, mca_pml_yalla_freelist_t);

    opal_progress_register(mca_pml_yalla_progress);

    PML_YALLA_VERBOSE(2, "created mxm context %p ep %p", (void *)ompi_pml_yalla.mxm_context,
                      (void *)ompi_pml_yalla.mxm_ep);
    return OMPI_SUCCESS;
}
Пример #5
0
int mca_pml_ob1_enable_progress(int32_t count)
{
    int32_t progress_count = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, count);
    if( 1 < progress_count )
        return 0;  /* progress was already on */

    opal_progress_register(mca_pml_ob1_progress);
    return 1;
}
Пример #6
0
int ompi_mtl_mx_module_init(){ 
    mx_param_t mx_param;
    mx_return_t mx_return;
    
    
    /* setup params */
    mx_param.key = MX_PARAM_UNEXP_QUEUE_MAX;
    mx_param.val.unexp_queue_max = ompi_mtl_mx.mx_unexp_queue_max;
    
   
    /* get a local endpoint */
    mx_return = mx_open_endpoint(MX_ANY_NIC, 
                                 MX_ANY_ENDPOINT,
                                 ompi_mtl_mx.mx_filter, 
                                 NULL, 
                                 0,
                                 &ompi_mtl_mx.mx_endpoint);
    
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_open_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    /* get the endpoint address */
    mx_return = mx_get_endpoint_addr( ompi_mtl_mx.mx_endpoint, 
                                      &ompi_mtl_mx.mx_endpoint_addr); 
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_get_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    mx_return = mx_decompose_endpoint_addr( ompi_mtl_mx.mx_endpoint_addr, &(ompi_mtl_mx.mx_addr.nic_id),
                                            &(ompi_mtl_mx.mx_addr.endpoint_id) );
    
    if(mx_return != MX_SUCCESS) { 
        opal_output(ompi_mtl_base_output, "Error in mx_decompose_endpoint_addr (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }


    
    ompi_modex_send( &mca_mtl_mx_component.super.mtl_version, 
                             &ompi_mtl_mx.mx_addr, 
                             sizeof(mca_mtl_mx_addr_t));
    
    /* register the mtl mx progress function */
    opal_progress_register(ompi_mtl_mx_progress);
    
    
    return OMPI_SUCCESS; 
    
        
}
Пример #7
0
int NBC_Init_handle(struct ompi_communicator_t *comm, ompi_coll_libnbc_request_t **request, ompi_coll_libnbc_module_t *comminfo)
{
  int tmp_tag;
  bool need_register = false;
  ompi_coll_libnbc_request_t *handle;

  OMPI_COLL_LIBNBC_REQUEST_ALLOC(comm, handle);
  if (NULL == handle) return OMPI_ERR_OUT_OF_RESOURCE;
  *request = handle;

  handle->tmpbuf = NULL;
  handle->req_count = 0;
  handle->req_array = NULL;
  handle->comm = comm;
  handle->schedule = NULL;
  /* first int is the schedule size */
  handle->row_offset = sizeof(int);

  /******************** Do the tag and shadow comm administration ...  ***************/

  OPAL_THREAD_LOCK(&comminfo->mutex);
  tmp_tag = comminfo->tag--;
  if (tmp_tag == MCA_COLL_BASE_TAG_NONBLOCKING_END) {
      tmp_tag = comminfo->tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
      NBC_DEBUG(2,"resetting tags ...\n"); 
  }

  if (true != comminfo->comm_registered) {
      comminfo->comm_registered = true;
      need_register = true;
  }
  OPAL_THREAD_UNLOCK(&comminfo->mutex);

  handle->tag=comminfo->tag;

  /* register progress */
  if (need_register) {
      int32_t tmp = 
          OPAL_THREAD_ADD32(&mca_coll_libnbc_component.active_comms, 1);
      if (tmp == 1) {
          opal_progress_register(ompi_coll_libnbc_progress);
      }
  }

  handle->comm=comm;
  /*printf("got comminfo: %lu tag: %i\n", comminfo, comminfo->tag);*/
  
  /******************** end of tag and shadow comm administration ...  ***************/
  handle->comminfo = comminfo;
  
  NBC_DEBUG(3, "got tag %i\n", handle->tag);

  return NBC_OK;
}
/*
 * Open the component
 */
static int iboffload_open(void)
{
    int rc;

    /* local variables */
    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;

    IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n"));

    cm->super.priority = 100;
    cm->super.n_net_contexts = 0;
    cm->super.network_contexts = NULL;

    OBJ_CONSTRUCT(&cm->recv_wrs.lock, opal_mutex_t);

    /* construct lists */
    OBJ_CONSTRUCT(&cm->devices, opal_pointer_array_t);
    rc = opal_pointer_array_init(&cm->devices, 10, INT_MAX, 10);
    if (OMPI_SUCCESS != rc) {
        goto close_device;
    }

    /* load mca parametres */
    rc = mca_bcol_iboffload_register_params();
    if (OMPI_SUCCESS != rc) {
        goto close_device;
    }

    /* Register the progress function */
    rc = opal_progress_register(mca_bcol_iboffload_component_progress);
    if (OMPI_SUCCESS != rc) {
        IBOFFLOAD_ERROR(("Failed to register the progress function"
                         " for iboffload component.\n"));
        goto close_device;
    }

    map_ompi_to_ib_dtype();
    map_ompi_to_ib_op_type();

    /* The init_done set to true on first component usage */
    cm->init_done = false;

    return OMPI_SUCCESS;

close_device:
    OBJ_DESTRUCT(&cm->devices);
    OBJ_DESTRUCT(&cm->recv_wrs.lock);
    return rc;
}
static int hcoll_open(void)
{
    int rc;

    mca_coll_hcoll_output = opal_output_open(NULL);
    opal_output_set_verbosity(mca_coll_hcoll_output, mca_coll_hcoll_component.hcoll_verbose);

    hcoll_rte_fns_setup();

    opal_progress_register(hcoll_progress_fn);
    rc = hcoll_init();

    if (HCOLL_SUCCESS != rc){
        opal_progress_unregister(hcoll_progress_fn);
        HCOL_VERBOSE(1,"Hcol library init failed");
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Пример #10
0
int memheap_oob_init(mca_memheap_map_t *map)
{
    int rc = OSHMEM_SUCCESS;
    int i;
    oob_comm_request_t *r;

    memheap_map = map;

    OBJ_CONSTRUCT(&memheap_oob.lck, opal_mutex_t);
    OBJ_CONSTRUCT(&memheap_oob.cond, opal_condition_t);
    OBJ_CONSTRUCT(&memheap_oob.req_list, opal_list_t);


    for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) {
        r = &memheap_oob.req_pool[i];
        rc = PMPI_Recv_init(r->buf, sizeof(r->buf), MPI_BYTE,
                MPI_ANY_SOURCE, 0,
                oshmem_comm_world,
                &r->recv_req);
        if (MPI_SUCCESS != rc) {
            MEMHEAP_ERROR("Failed to created recv request %d", rc);
            return rc;
        }

        rc = PMPI_Start(&r->recv_req);
        if (MPI_SUCCESS != rc) {
            MEMHEAP_ERROR("Failed to post recv request %d", rc);
            return rc;
        }
        opal_list_append(&memheap_oob.req_list, &r->super);
    }

    opal_progress_register(oshmem_mkey_recv_cb);
    memheap_oob.is_inited = 1;

    return rc;
}
Пример #11
0
int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
{
    spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL;
    spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL;
    spml_ikrit_mxm_ep_conn_info_t my_ep_info = {{0}};
#if MXM_API < MXM_VERSION(2,0)
    mxm_conn_req_t *conn_reqs;
    int timeout;
#else
    size_t mxm_addr_len = MXM_MAX_ADDR_LEN;
#endif
    mxm_error_t err;
    size_t i, n;
    int rc = OSHMEM_ERROR;
    ompi_proc_t *proc_self;
    int my_rank = oshmem_my_proc_id();

    OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t);
    /* Allocate connection requests */
#if MXM_API < MXM_VERSION(2,0)
    conn_reqs = malloc(nprocs * sizeof(mxm_conn_req_t));
    if (NULL == conn_reqs) {
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto bail;
    }
    memset(conn_reqs, 0x0, sizeof(mxm_conn_req_t));
#endif
    ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs);
    if (NULL == ep_info) {
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto bail;
    }

    if (mca_spml_ikrit.hw_rdma_channel) {
        ep_hw_rdma_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs);
        if (NULL == ep_hw_rdma_info) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            goto bail;
        }
    }

    mca_spml_ikrit.mxm_peers = (mxm_peer_t **) malloc(nprocs
            * sizeof(*(mca_spml_ikrit.mxm_peers)));
    if (NULL == mca_spml_ikrit.mxm_peers) {
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto bail;
    }

#if MXM_API < MXM_VERSION(2,0)
    if (OSHMEM_SUCCESS
            != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_SELF)) {
        rc = OSHMEM_ERROR;
        goto bail;
    }
    if (OSHMEM_SUCCESS
            != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_RDMA)) {
        rc = OSHMEM_ERROR;
        goto bail;
    }
#else
    if (mca_spml_ikrit.hw_rdma_channel) {
        err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
        if (MXM_OK != err) {
            orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true,
                    mxm_error_string(err));
            rc = OSHMEM_ERROR;
            goto bail;
        }
        oshmem_shmem_allgather(&my_ep_info, ep_hw_rdma_info,
                sizeof(spml_ikrit_mxm_ep_conn_info_t));
    }
    err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
    if (MXM_OK != err) {
        orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true,
                mxm_error_string(err));
        rc = OSHMEM_ERROR;
        goto bail;
    }
#endif
    oshmem_shmem_allgather(&my_ep_info, ep_info,
                           sizeof(spml_ikrit_mxm_ep_conn_info_t));

    opal_progress_register(spml_ikrit_progress);

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {

        /* mxm 2.0 keeps its connections on a list. Make sure
         * that list have different order on every rank */
        i = (my_rank + n) % nprocs;
        mca_spml_ikrit.mxm_peers[i] = OBJ_NEW(mxm_peer_t);
        if (NULL == mca_spml_ikrit.mxm_peers[i]) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            goto bail;
        }
        mca_spml_ikrit.mxm_peers[i]->pe = i;

#if MXM_API < MXM_VERSION(2,0)
        conn_reqs[i].ptl_addr[MXM_PTL_SELF] =
                (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_SELF];
        conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL;
        conn_reqs[i].ptl_addr[MXM_PTL_RDMA] =
                (struct sockaddr *) &ep_info[i].addr.ptl_addr[MXM_PTL_RDMA];
#else
        err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_conn);
        if (MXM_OK != err) {
            SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
            goto bail;
        }
        if (OSHMEM_SUCCESS != create_ptl_idx(i))
                goto bail;
        mxm_conn_ctx_set(mca_spml_ikrit.mxm_peers[i]->mxm_conn, mca_spml_ikrit.mxm_peers[i]);
        if (mca_spml_ikrit.hw_rdma_channel) {
            err = mxm_ep_connect(mca_spml_ikrit.mxm_hw_rdma_ep, ep_hw_rdma_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn);
            if (MXM_OK != err) {
                SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
                goto bail;
            }
        } else {
            mca_spml_ikrit.mxm_peers[i]->mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i]->mxm_conn;
        }
#endif
    }

#if MXM_API < MXM_VERSION(2,0)
    /* Connect to remote peers */
    if (mxm_get_version() < MXM_VERSION(1,5)) {
        timeout = 1000;
    } else {
        timeout = -1;
    }
    err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, conn_reqs, nprocs, timeout);
    if (MXM_OK != err) {
        SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
        for (i = 0; i < nprocs; ++i) {
            if (MXM_OK != conn_reqs[i].error) {
                SPML_ERROR("MXM EP connect to %s error: %s\n",
                           procs[i]->proc_hostname, mxm_error_string(conn_reqs[i].error));
            }
        }
        rc = OSHMEM_ERR_CONNECTION_FAILED;
        goto bail;
    }

    /* Save returned connections */
    for (i = 0; i < nprocs; ++i) {
        mca_spml_ikrit.mxm_peers[i]->mxm_conn = conn_reqs[i].conn;
        if (OSHMEM_SUCCESS != create_ptl_idx(i)) {
            rc = OSHMEM_ERR_CONNECTION_FAILED;
            goto bail;
        }

        mxm_conn_ctx_set(conn_reqs[i].conn, mca_spml_ikrit.mxm_peers[i]);
    }

    if (conn_reqs)
        free(conn_reqs);
#endif
    if (ep_info)
        free(ep_info);
    if (ep_hw_rdma_info)
        free(ep_hw_rdma_info);

#if MXM_API >= MXM_VERSION(2,0)
    if (mca_spml_ikrit.bulk_connect) {
        /* Need a barrier to ensure remote peers already created connection */
        oshmem_shmem_barrier();
        mxm_ep_wireup(mca_spml_ikrit.mxm_ep);
    }
#endif

    proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank);
    /* identify local processes and change transport to SHM */
    for (i = 0; i < nprocs; i++) {
        if (procs[i]->super.proc_name.jobid != proc_self->super.proc_name.jobid ||
            !OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) {
            continue;
        }
        if (procs[i] == proc_self)
            continue;

        /* use zcopy for put/get via sysv shared memory */
        OSHMEM_PROC_DATA(procs[i])->transport_ids[0] = MXM_PTL_SHM;
        OSHMEM_PROC_DATA(procs[i])->transport_ids[1] = MXM_PTL_RDMA;
        OSHMEM_PROC_DATA(procs[i])->num_transports = 2;
    }

    SPML_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

bail:
#if MXM_API < MXM_VERSION(2,0)
	if (conn_reqs)
		free(conn_reqs);
#endif
	if (ep_info)
		free(ep_info);
	if (ep_hw_rdma_info)
		free(ep_hw_rdma_info);
    SPML_ERROR("add procs FAILED rc=%d", rc);

    return rc;

}
Пример #12
0
static int mca_bml_r2_add_procs( size_t nprocs, 
                                 struct ompi_proc_t** procs, 
                                 struct opal_bitmap_t* reachable )
{
    size_t p, p_index, n_new_procs = 0;
    struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;  
    struct ompi_proc_t** new_procs = NULL; 
    struct ompi_proc_t *unreach_proc = NULL;
    int rc, ret = OMPI_SUCCESS;

    if(0 == nprocs) {
        return OMPI_SUCCESS;
    }
    
    if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) {
        return rc;
    }
    
    /* Select only the procs that don't yet have the BML proc struct. This prevent
     * us from calling btl->add_procs several this on the same destination proc.
     */
    for(p_index = 0; p_index < nprocs; p_index++) { 
        struct ompi_proc_t* proc = procs[p_index]; 

        OBJ_RETAIN(proc); 
        if(NULL !=  proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { 
            continue;  /* go to the next proc */
        }
        /* Allocate the new_procs on demand */
        if( NULL == new_procs ) {
            new_procs = (struct ompi_proc_t **)malloc(nprocs * sizeof(struct ompi_proc_t *));
            if( NULL == new_procs ) {
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
        }
        new_procs[n_new_procs++] = proc; 
    }

    if ( 0 == n_new_procs ) {
        return OMPI_SUCCESS;
    }

    /* Starting from here we only work on the unregistered procs */
    procs = new_procs; 
    nprocs = n_new_procs; 
    
    /* attempt to add all procs to each r2 */
    btl_endpoints = (struct mca_btl_base_endpoint_t **) 
        malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*)); 
    if (NULL == btl_endpoints) {
        free(new_procs);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
        mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
        int btl_inuse = 0;

        /* if the r2 can reach the destination proc it sets the
         * corresponding bit (proc index) in the reachable bitmap
         * and can return addressing information for each proc
         * that is passed back to the r2 on data transfer calls
         */
        opal_bitmap_clear_all_bits(reachable);
        memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); 

        rc = btl->btl_add_procs(btl, n_new_procs, new_procs, btl_endpoints, reachable);
        if(OMPI_SUCCESS != rc) {
            /* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
             * can take care of this task.
             */
            continue;
        }

        /* for each proc that is reachable */
        for( p = 0; p < n_new_procs; p++ ) {
            if(opal_bitmap_is_set_bit(reachable, p)) {
                ompi_proc_t *proc = new_procs[p]; 
                mca_bml_base_endpoint_t * bml_endpoint = 
                    (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; 
                mca_bml_base_btl_t* bml_btl; 
                size_t size;
                
                if(NULL == bml_endpoint) { 
                    /* allocate bml specific proc data */
                    bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
                    if (NULL == bml_endpoint) {
                        opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
                        free(btl_endpoints);
                        free(new_procs);
                        return OMPI_ERR_OUT_OF_RESOURCE;
                    }
                    
                    /* preallocate space in array for max number of r2s */
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send,  mca_bml_r2.num_btl_modules);
                    mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma,  mca_bml_r2.num_btl_modules);
                    bml_endpoint->btl_max_send_size = -1;
                    bml_endpoint->btl_proc = proc;
                    proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint; 
                 
                    bml_endpoint->btl_flags_or = 0;
                }

                /* dont allow an additional BTL with a lower exclusivity ranking */
                size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
                if(size > 0) {
                    bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
                    /* skip this btl if the exclusivity is less than the previous */
                    if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity) {
                        btl->btl_del_procs(btl, 1, &proc, &btl_endpoints[p]);
                        continue;
                    }
                }

                /* cache the endpoint on the proc */
                bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
                bml_btl->btl = btl;
                bml_btl->btl_endpoint = btl_endpoints[p];
                bml_btl->btl_weight = 0;
                bml_btl->btl_flags = btl->btl_flags; 
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
                                " the %s BTL without any PUT function attached. Disard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_PUT;
                }
                if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
                    opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
                                " the %s BTL without any GET function attached. Discard the flag !",
                                bml_btl->btl->btl_component->btl_version.mca_component_name);
                    bml_btl->btl_flags ^= MCA_BTL_FLAGS_GET;
                }
                if( (bml_btl->btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
                    /**
                     * If no protocol specified, we have 2 choices: we ignore the BTL
                     * as we don't know which protocl to use, or we suppose that all
                     * BTLs support the send protocol. 
                     */
                    bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
                }
                /**
                 * calculate the bitwise OR of the btl flags 
                 */
                bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
                /* This BTL is in use, allow the progress registration */
                btl_inuse++;
            }
        }
        if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
            size_t p;
            bool found = false;
            for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
                if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
                    found = true;
                    break;
                }
            }
            if(found == false) {
                mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] = 
                    btl->btl_component->btl_progress;
                mca_bml_r2.num_btl_progress++;
                opal_progress_register( btl->btl_component->btl_progress );
            }
        }
    }
    free(btl_endpoints);

    /* iterate back through procs and compute metrics for registered r2s */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];
        mca_bml_base_endpoint_t* bml_endpoint = 
            (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
        double total_bandwidth = 0;
        uint32_t latency = 0xffffffff;
        size_t n_index;
        size_t n_size;

        /* skip over procs w/ no btl's registered */
        if(NULL == bml_endpoint) {
            continue;
        }

        /* (1) determine the total bandwidth available across all btls
         *     note that we need to do this here, as we may already have btls configured
         * (2) determine the highest priority ranking for latency
         * (3) compute the maximum amount of bytes that can be send without any
         *     weighting. Once the left over is smaller than this number we will
         *     start using the weight to compute the correct amount.
         */
        n_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); 
        
        /* sort BTLs in descending order according to bandwidth value */
        qsort(bml_endpoint->btl_send.bml_btls, n_size,
                sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);

        bml_endpoint->btl_rdma_index = 0;
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t* btl = bml_btl->btl;
            total_bandwidth += bml_btl->btl->btl_bandwidth;
            if(btl->btl_latency < latency) {
                latency = btl->btl_latency;
            }
        }
        
        /* (1) set the weight of each btl as a percentage of overall bandwidth
         * (2) copy all btl instances at the highest priority ranking into the
         *     list of btls used for first fragments
         */
        for(n_index = 0; n_index < n_size; n_index++) {
            mca_bml_base_btl_t* bml_btl = 
                mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
            mca_btl_base_module_t *btl = bml_btl->btl;

            /* compute weighting factor for this r2 */
            if(btl->btl_bandwidth > 0) {
                bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
            } else {
                bml_btl->btl_weight = (float)(1.0 / n_size);
            }

            /* check to see if this r2 is already in the array of r2s 
             * used for first fragments - if not add it.
             */
            if(btl->btl_latency == latency) {
                mca_bml_base_btl_t* bml_btl_new = 
                    mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
                *bml_btl_new = *bml_btl;
            }

            /* set endpoint max send size as min of available btls */
            if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
               bml_endpoint->btl_max_send_size = btl->btl_max_send_size;

            /* check flags - is rdma prefered */
            if ((btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET)) &&
                !((proc->proc_arch != ompi_proc_local_proc->proc_arch) &&
                  (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
                mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
                mca_btl_base_module_t* btl_rdma = bml_btl->btl;

                *bml_btl_rdma = *bml_btl;
                if(bml_endpoint->btl_pipeline_send_length < btl_rdma->btl_rdma_pipeline_send_length) {
                    bml_endpoint->btl_pipeline_send_length = btl_rdma->btl_rdma_pipeline_send_length;
                }
                if(bml_endpoint->btl_send_limit < btl_rdma->btl_min_rdma_pipeline_size) {
                    bml_endpoint->btl_send_limit = btl_rdma->btl_min_rdma_pipeline_size;
                }
            }
        }
    }

    /* see if we have a connection to everyone else */
    for(p=0; p<n_new_procs; p++) {
        ompi_proc_t *proc = new_procs[p];

        if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
            if (NULL == unreach_proc) {
                unreach_proc = proc;
            }
            ret = OMPI_ERR_UNREACH;
        }
    }

    if (mca_bml_r2.show_unreach_errors && 
        OMPI_ERR_UNREACH == ret) {
        opal_show_help("help-mca-bml-r2.txt",
                       "unreachable proc",
                       true, 
                       OMPI_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       OMPI_NAME_PRINT(&(unreach_proc->proc_name)),
                       (NULL != ompi_proc_local_proc->proc_hostname ?
                        ompi_proc_local_proc->proc_hostname : "unknown!"),
                       btl_names);
    }

    free(new_procs); 

    return ret;
}
Пример #13
0
int
ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
                            size_t nprocs,
                            struct ompi_proc_t** procs)
{
    int ret, me;
    size_t i;
    bool new_found = false;
    ptl_process_t *maptable;

    if (ompi_mtl_portals4.use_logical) {
        maptable = malloc(sizeof(ptl_process_t) * nprocs);
        if (NULL == maptable) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: malloc failed\n",
                                __FILE__, __LINE__);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
    }

    /* Get the list of ptl_process_id_t from the runtime and copy into structure */
    for (i = 0 ; i < nprocs ; ++i) {
        ptl_process_t *modex_id;
        size_t size;

        if( procs[i] == ompi_proc_local_proc ) {
            me = i;
        }

        if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Portals 4 MTL does not support heterogeneous operations.");
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "Proc %s architecture %x, mine %x.",
                                OMPI_NAME_PRINT(&procs[i]->super.proc_name),
                                procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch);
            return OMPI_ERR_NOT_SUPPORTED;
        }

        OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
                        &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        } else if (sizeof(ptl_process_t) != size) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_modex_recv failed: %d\n",
                                __FILE__, __LINE__, ret);
            return OMPI_ERR_BAD_PARAM;
        }

        if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) {
            ptl_process_t *peer_id;
            peer_id = malloc(sizeof(ptl_process_t));
            if (NULL == peer_id) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: malloc failed: %d\n",
                                    __FILE__, __LINE__, ret);
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
            if (ompi_mtl_portals4.use_logical) {
                peer_id->rank = i;
                maptable[i].phys.pid = modex_id->phys.pid;
                maptable[i].phys.nid = modex_id->phys.nid;
                opal_output_verbose(50, ompi_mtl_base_framework.framework_output,
                    "logical: global rank=%d pid=%d nid=%d\n",
                    (int)i, maptable[i].phys.pid, maptable[i].phys.nid);
            } else {
                *peer_id = *modex_id;
            }

            procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id;

            new_found = true;
        } else {
            ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
            if (ompi_mtl_portals4.use_logical) {
                if ((size_t)proc->rank != i) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: existing peer and rank don't match\n",
                                    __FILE__, __LINE__);
                    return OMPI_ERROR;
                }
                maptable[i].phys.pid = modex_id->phys.pid;
                maptable[i].phys.nid = modex_id->phys.nid;
            }
            else if (proc->phys.nid != modex_id->phys.nid ||
                     proc->phys.pid != modex_id->phys.pid) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: existing peer and modex peer don't match\n",
                                    __FILE__, __LINE__);
                return OMPI_ERROR;
            }
        }
    }

    if (ompi_mtl_portals4.use_logical) {
        ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: logical mapping failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "logical mapping OK\n");
        free(maptable);
    }

    portals4_init_interface();

    /* activate progress callback */
    ret = opal_progress_register(ompi_mtl_portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    if (new_found) {
        ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: flowctl_add_procs failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
    }
#endif

    return OMPI_SUCCESS;
}
Пример #14
0
/*
    /!\ Called for each processes /!\
 */
static int
portals4_init_query(bool enable_progress_threads,
        bool enable_mpi_threads)
{
    int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* Initialize Portals and create a physical, matching interface */
    ret = PtlInit();
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlNIInit(PTL_IFACE_DEFAULT,
            PTL_NI_PHYSICAL | PTL_NI_MATCHING,
            PTL_PID_ANY,
            NULL,
            &mca_coll_portals4_component.ni_limits,
            &mca_coll_portals4_component.ni_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlNIInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }


    ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    /* FIX ME: Need to make sure our ID matches with the MTL... */
    ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetUid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlEQAlloc(mca_coll_portals4_component.ni_h,
            MCA_COLL_PORTALS4_EQ_SIZE,
            &mca_coll_portals4_component.eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlEQAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_TABLE_ID,
            &mca_coll_portals4_component.pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_FINISH_TABLE_ID,
            &mca_coll_portals4_component.finish_pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    /* Bind MD/MDs across all memory.  We prefer (for obvious reasons)
       to have a single MD across all of memory */
    memset(&md, 0, sizeof(ptl_md_t));
    md.start = 0;
    md.length = 0;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.data_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%x\n", md.start, md.length));

    /* setup finish ack ME */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = 0;
    me.ignore_bits = 0;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.finish_pt_idx,
            &me,
            PTL_PRIORITY_LIST,
            NULL,
            &mca_coll_portals4_component.finish_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* This ME is used for RTR exchange only */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;

    /* Note : the RTR bit must be set to match this ME,
     * this allows to discriminate the RTR from data flow
     * (especially for the Barrier operations)
     */
    COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0);
    me.ignore_bits = ~COLL_PORTALS4_RTR_MASK;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.pt_idx,
            &me,
            PTL_OVERFLOW_LIST,
            NULL,
            &mca_coll_portals4_component.unex_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* activate progress callback */
    ret = opal_progress_register(portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: opal_progress_register failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;

    }
    return OMPI_SUCCESS;

}
Пример #15
0
int mca_pml_yalla_init(void)
{
    mxm_context_opts_t *ctx_opts;
    mxm_ep_opts_t *ep_opts;
    mxm_error_t error;
    int rc;

    PML_YALLA_VERBOSE(1, "mca_pml_yalla_init");

    /* Set memory hooks */
    if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
        ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
         opal_mem_hooks_support_level()))
    {
        PML_YALLA_VERBOSE(1, "enabling on-demand memory mapping");
        opal_setenv("MXM_PML_MEM_ON_DEMAND_MAP", "y", false, &environ);
        ompi_pml_yalla.using_mem_hooks = 1;
    } else {
        PML_YALLA_VERBOSE(1, "disabling on-demand memory mapping");
        ompi_pml_yalla.using_mem_hooks = 0;
    }
    opal_setenv("MXM_PML_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y",
                false, &environ);

    /* Read options */
    error = mxm_config_read_opts(&ctx_opts, &ep_opts, "PML", NULL, 0);
    if (MXM_OK != error) {
        return OMPI_ERROR;
    }

    error = mxm_init(ctx_opts, &ompi_pml_yalla.mxm_context);
    if (MXM_OK != error) {
        return OMPI_ERROR;
    }

    if (ompi_pml_yalla.using_mem_hooks) {
        opal_mem_hooks_register_release(mca_pml_yalla_mem_release_cb, NULL);
    }

    error = mxm_ep_create(ompi_pml_yalla.mxm_context, ep_opts, &ompi_pml_yalla.mxm_ep);
    if (MXM_OK != error) {
        return OMPI_ERROR;
    }

    mxm_config_free_context_opts(ctx_opts);
    mxm_config_free_ep_opts(ep_opts);

    rc = send_ep_address();
    if (rc < 0) {
        return rc;
    }

    OBJ_CONSTRUCT(&ompi_pml_yalla.send_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.bsend_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.recv_reqs, mca_pml_yalla_freelist_t);
    OBJ_CONSTRUCT(&ompi_pml_yalla.convs, mca_pml_yalla_freelist_t);

    opal_progress_register(mca_pml_yalla_progress);

    PML_YALLA_VERBOSE(2, "created mxm context %p ep %p", ompi_pml_yalla.mxm_context,
                      ompi_pml_yalla.mxm_ep);
    return OMPI_SUCCESS;
}
/*
 * Open the component
 */
static int basesmuma_open(void)
{

    /* local variables */
    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
    int ret = OMPI_SUCCESS;
    opal_mutex_t *mutex_ptr;
    int dummy;

    /*
     * Make sure that the number of banks is a power of 2
     */
    cs->basesmuma_num_mem_banks=
        roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy);
    if ( 0 == cs->basesmuma_num_mem_banks ) {
        ret=OMPI_ERROR;
        goto ERROR;
    }
    
    /*
     * Make sure that the the number of buffers is a power of 2
     */
    cs->basesmuma_num_regions_per_bank=
        roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy);
    if ( 0 == cs->basesmuma_num_regions_per_bank ) {
        ret=OMPI_ERROR;
        goto ERROR;
    }

	/* Portals initialization */
	cs->portals_init = false;
	cs->portals_info = NULL;

    /*
     * initialization
     */
    cs->sm_ctl_structs=NULL;
    OBJ_CONSTRUCT(&(cs->sm_connections_list),opal_list_t);
    OBJ_CONSTRUCT(&(cs->nb_admin_barriers),opal_list_t);
    mutex_ptr= &(cs->nb_admin_barriers_mutex);
    OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);

	/* Control structures object construct 
	 */
     OBJ_CONSTRUCT(&(cs->ctl_structures), opal_list_t);
	
    /* shared memory has not been registered yet */
    cs->mpool_inited = false;

    /* initialize base file names */
    cs->clt_base_fname="sm_ctl_mem_";
    cs->payload_base_fname="sm_payload_mem_";

    /* initialize the size of the shared memory scartch region */
    cs->my_scratch_shared_memory_size=getpagesize();
    cs->my_scratch_shared_memory=NULL;
    cs->scratch_offset_from_base_ctl_file=0;

    /*
     * register the progess function
     */
    ret=opal_progress_register(bcol_basesmuma_progress);
    if (MPI_SUCCESS != ret) {
        opal_output(0, "failed to register the progress function\n");
    }

    return ret;

ERROR:
    return ret;
}
Пример #17
0
int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
{
    mca_spml_ucx_ctx_t *ucx_ctx;
    ucp_worker_params_t params;
    ucp_ep_params_t ep_params;
    size_t i, j, nprocs = oshmem_num_procs();
    ucs_status_t err;
    int my_pe = oshmem_my_proc_id();
    size_t len;
    spml_ucx_mkey_t *ucx_mkey;
    sshmem_mkey_t *mkey;
    int rc = OSHMEM_ERROR;

    ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t));
    ucx_ctx->options = options;

    params.field_mask  = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
    if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) {
        params.thread_mode = UCS_THREAD_MODE_SINGLE;
    } else {
        params.thread_mode = UCS_THREAD_MODE_MULTI;
    }

    err = ucp_worker_create(mca_spml_ucx.ucp_context, &params,
                            &ucx_ctx->ucp_worker);
    if (UCS_OK != err) {
        free(ucx_ctx);
        return OSHMEM_ERROR;
    }

    ucx_ctx->ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ucx_ctx->ucp_peers)));
    if (NULL == ucx_ctx->ucp_peers) {
        goto error;
    }

    if (mca_spml_ucx.active_array.ctxs_count == 0) {
        opal_progress_register(spml_ucx_ctx_progress);
    }

    for (i = 0; i < nprocs; i++) {
        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]);
        err = ucp_ep_create(ucx_ctx->ucp_worker, &ep_params,
                            &ucx_ctx->ucp_peers[i].ucp_conn);
        if (UCS_OK != err) {
            SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs,
                       ucs_status_string(err));
            goto error2;
        }

        for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
            mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0];
            ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key;
            err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn,
                                     mkey->u.data,
                                     &ucx_mkey->rkey);
            if (UCS_OK != err) {
                SPML_UCX_ERROR("failed to unpack rkey");
                goto error2;
            }
            mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i);
        }
    }

    SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex);
    _ctx_add(&mca_spml_ucx.active_array, ucx_ctx);
    SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);

    (*ctx) = (shmem_ctx_t)ucx_ctx;
    return OSHMEM_SUCCESS;

 error2:
    for (i = 0; i < nprocs; i++) {
        if (ucx_ctx->ucp_peers[i].ucp_conn) {
            ucp_ep_destroy(ucx_ctx->ucp_peers[i].ucp_conn);
        }
    }

    if (ucx_ctx->ucp_peers)
        free(ucx_ctx->ucp_peers);

 error:
    ucp_worker_destroy(ucx_ctx->ucp_worker);
    free(ucx_ctx);
    rc = OSHMEM_ERR_OUT_OF_RESOURCE;
    SPML_ERROR("ctx create FAILED rc=%d", rc);
    return rc;
}
Пример #18
0
int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
{
    size_t i, j, n;
    int rc = OSHMEM_ERROR;
    int my_rank = oshmem_my_proc_id();
    ucs_status_t err;
    ucp_address_t *wk_local_addr;
    size_t wk_addr_len;
    int *wk_roffs = NULL;
    int *wk_rsizes = NULL;
    char *wk_raddrs = NULL;
    ucp_ep_params_t ep_params;


    mca_spml_ucx_ctx_default.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx_ctx_default.ucp_peers)));
    if (NULL == mca_spml_ucx_ctx_default.ucp_peers) {
        goto error;
    }

    err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len);
    if (err != UCS_OK) {
        goto error;
    }
    dump_address(my_rank, (char *)wk_local_addr, wk_addr_len);

    rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs,
                            (void **)&wk_raddrs, &wk_roffs, &wk_rsizes);
    if (rc != OSHMEM_SUCCESS) {
        goto error;
    }

    opal_progress_register(spml_ucx_default_progress);

    mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *));
    memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *));

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {
        i = (my_rank + n) % nprocs;
        dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]);

        ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
        ep_params.address    = (ucp_address_t *)(wk_raddrs + wk_roffs[i]);

        err = ucp_ep_create(mca_spml_ucx_ctx_default.ucp_worker, &ep_params,
                            &mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn);
        if (UCS_OK != err) {
            SPML_UCX_ERROR("ucp_ep_create(proc=%zu/%zu) failed: %s", n, nprocs,
                           ucs_status_string(err));
            goto error2;
        }

        OSHMEM_PROC_DATA(procs[i])->num_transports = 1;
        OSHMEM_PROC_DATA(procs[i])->transport_ids = spml_ucx_transport_ids;

        for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) {
            mca_spml_ucx_ctx_default.ucp_peers[i].mkeys[j].key.rkey = NULL;
        }

        mca_spml_ucx.remote_addrs_tbl[i] = (char *)malloc(wk_rsizes[i]);
        memcpy(mca_spml_ucx.remote_addrs_tbl[i], (char *)(wk_raddrs + wk_roffs[i]),
               wk_rsizes[i]);
    }

    ucp_worker_release_address(mca_spml_ucx_ctx_default.ucp_worker, wk_local_addr);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);

    SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

error2:
    for (i = 0; i < nprocs; ++i) {
         if (mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn) {
             ucp_ep_destroy(mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn);
         }
         if (mca_spml_ucx.remote_addrs_tbl[i]) {
             free(mca_spml_ucx.remote_addrs_tbl[i]);
         }
    }
    if (mca_spml_ucx_ctx_default.ucp_peers)
        free(mca_spml_ucx_ctx_default.ucp_peers);
    if (mca_spml_ucx.remote_addrs_tbl)
        free(mca_spml_ucx.remote_addrs_tbl);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);
error:
    rc = OSHMEM_ERR_OUT_OF_RESOURCE;
    SPML_UCX_ERROR("add procs FAILED rc=%d", rc);
    return rc;

}
Пример #19
0
int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs)
{
    spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL;
    spml_ikrit_mxm_ep_conn_info_t *ep_hw_rdma_info = NULL;
    spml_ikrit_mxm_ep_conn_info_t my_ep_info;
    size_t mxm_addr_len = MXM_MAX_ADDR_LEN;
    mxm_error_t err;
    size_t i, n;
    int rc = OSHMEM_ERROR;
    ompi_proc_t *proc_self;
    int my_rank = oshmem_my_proc_id();

    OBJ_CONSTRUCT(&mca_spml_ikrit.active_peers, opal_list_t);
    /* Allocate connection requests */
    ep_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs);
    if (NULL == ep_info) {
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto bail;
    }

    if (mca_spml_ikrit.hw_rdma_channel) {
        ep_hw_rdma_info = calloc(sizeof(spml_ikrit_mxm_ep_conn_info_t), nprocs);
        if (NULL == ep_hw_rdma_info) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            goto bail;
        }
    }

    mca_spml_ikrit.mxm_peers = (mxm_peer_t *) calloc(nprocs , sizeof(mxm_peer_t));
    if (NULL == mca_spml_ikrit.mxm_peers) {
        rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        goto bail;
    }

    memset(&my_ep_info, 0, sizeof(my_ep_info));

    if (mca_spml_ikrit.hw_rdma_channel) {
        err = mxm_ep_get_address(mca_spml_ikrit.mxm_hw_rdma_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
        if (MXM_OK != err) {
            orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true,
                    mxm_error_string(err));
            rc = OSHMEM_ERROR;
            goto bail;
        }
        oshmem_shmem_allgather(&my_ep_info, ep_hw_rdma_info,
                sizeof(spml_ikrit_mxm_ep_conn_info_t));
    }
    err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
    if (MXM_OK != err) {
        orte_show_help("help-oshmem-spml-ikrit.txt", "unable to get endpoint address", true,
                mxm_error_string(err));
        rc = OSHMEM_ERROR;
        goto bail;
    }

    oshmem_shmem_allgather(&my_ep_info, ep_info,
                           sizeof(spml_ikrit_mxm_ep_conn_info_t));

    opal_progress_register(spml_ikrit_progress);

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {

        /* mxm 2.0 keeps its connections on a list. Make sure
         * that list have different order on every rank */
        i = (my_rank + n) % nprocs;
        mxm_peer_construct(&mca_spml_ikrit.mxm_peers[i]);

        err = mxm_ep_connect(mca_spml_ikrit.mxm_ep, ep_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i].mxm_conn);
        if (MXM_OK != err) {
            SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
            goto bail;
        }
        mxm_conn_ctx_set(mca_spml_ikrit.mxm_peers[i].mxm_conn, &mca_spml_ikrit.mxm_peers[i]);
        if (mca_spml_ikrit.hw_rdma_channel) {
            err = mxm_ep_connect(mca_spml_ikrit.mxm_hw_rdma_ep, ep_hw_rdma_info[i].addr.ep_addr, &mca_spml_ikrit.mxm_peers[i].mxm_hw_rdma_conn);
            if (MXM_OK != err) {
                SPML_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
                goto bail;
            }
        } else {
            mca_spml_ikrit.mxm_peers[i].mxm_hw_rdma_conn = mca_spml_ikrit.mxm_peers[i].mxm_conn;
        }
    }

    if (ep_info)
        free(ep_info);
    if (ep_hw_rdma_info)
        free(ep_hw_rdma_info);

    if (mca_spml_ikrit.bulk_connect) {
        /* Need a barrier to ensure remote peers already created connection */
        oshmem_shmem_barrier();
        mxm_ep_wireup(mca_spml_ikrit.mxm_ep);
    }

    proc_self = oshmem_proc_group_find(oshmem_group_all, my_rank);
    /* identify local processes and change transport to SHM */
    for (i = 0; i < nprocs; i++) {
        if (procs[i]->super.proc_name.jobid != proc_self->super.proc_name.jobid ||
            !OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) {
            continue;
        }
        if (procs[i] == proc_self)
            continue;

        /* use zcopy for put/get via sysv shared memory with fallback to RDMA */
        mca_spml_ikrit.mxm_peers[i].ptl_id = MXM_PTL_SHM;
    }

    SPML_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

bail:
	if (ep_info)
		free(ep_info);
	if (ep_hw_rdma_info)
		free(ep_hw_rdma_info);
    SPML_ERROR("add procs FAILED rc=%d", rc);

    return rc;

}
Пример #20
0
static int component_init(bool enable_progress_threads, bool enable_mpi_threads) {
    ucp_config_t *config = NULL;
    ucp_params_t context_params;
    bool progress_registered = false, requests_created = false;
    int ret = OMPI_SUCCESS;
    ucs_status_t status;

    mca_osc_ucx_component.ucp_context = NULL;
    mca_osc_ucx_component.ucp_worker = NULL;
    mca_osc_ucx_component.enable_mpi_threads = enable_mpi_threads;

    status = ucp_config_read("MPI", NULL, &config);
    if (UCS_OK != status) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_config_read failed: %d\n",
                            __FILE__, __LINE__, status);
        return OMPI_ERROR;
    }

    OBJ_CONSTRUCT(&mca_osc_ucx_component.requests, opal_free_list_t);
    requests_created = true;
    ret = opal_free_list_init (&mca_osc_ucx_component.requests,
                               sizeof(ompi_osc_ucx_request_t),
                               opal_cache_line_size,
                               OBJ_CLASS(ompi_osc_ucx_request_t),
                               0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_free_list_init failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    mca_osc_ucx_component.num_incomplete_req_ops = 0;

    ret = opal_progress_register(progress_callback);
    progress_registered = true;
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* initialize UCP context */

    memset(&context_params, 0, sizeof(ucp_context_h));
    context_params.field_mask = UCP_PARAM_FIELD_FEATURES |
                                UCP_PARAM_FIELD_MT_WORKERS_SHARED |
                                UCP_PARAM_FIELD_ESTIMATED_NUM_EPS |
                                UCP_PARAM_FIELD_REQUEST_INIT |
                                UCP_PARAM_FIELD_REQUEST_SIZE;
    context_params.features = UCP_FEATURE_RMA | UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64;
    context_params.mt_workers_shared = 0;
    context_params.estimated_num_eps = ompi_proc_world_size();
    context_params.request_init = internal_req_init;
    context_params.request_size = sizeof(ompi_osc_ucx_internal_request_t);

    status = ucp_init(&context_params, config, &mca_osc_ucx_component.ucp_context);
    ucp_config_release(config);
    if (UCS_OK != status) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_init failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    return ret;
 error:
    if (progress_registered) opal_progress_unregister(progress_callback);
    if (requests_created) OBJ_DESTRUCT(&mca_osc_ucx_component.requests);
    if (mca_osc_ucx_component.ucp_context) ucp_cleanup(mca_osc_ucx_component.ucp_context);
    return ret;
}
Пример #21
0
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
    psm2_error_t err;
    psm2_ep_t	ep; /* endpoint handle */
    psm2_mq_t	mq;
    psm2_epid_t	epid; /* unique lid+port identifier */
    psm2_uuid_t  unique_job_key;
    struct psm2_ep_open_opts ep_opt;
    unsigned long long *uu = (unsigned long long *) unique_job_key;
    char *generated_key;
    char env_string[256];
    int rc;

    generated_key = getenv("OMPI_MCA_orte_precondition_transports");
    memset(uu, 0, sizeof(psm2_uuid_t));

    if (!generated_key || (strlen(generated_key) != 33) ||
        sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
    {
      opal_show_help("help-mtl-psm2.txt",
		     "no uuid present", true,
		     generated_key ? "could not be parsed from" :
		     "not present in", ompi_process_info.nodename);
      return OMPI_ERROR;

    }

    /* Handle our own errors for opening endpoints */
    psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);

    /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware
     * contexts correctly.
     */
    snprintf(env_string, sizeof(env_string), "%d", local_rank);
    setenv("MPI_LOCALRANKID", env_string, 0);
    snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
    setenv("MPI_LOCALNRANKS", env_string, 0);

    /* Setup the endpoint options. */
    psm2_ep_open_opts_get_defaults(&ep_opt);
    ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
    ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */

    /* Open PSM2 endpoint */
    err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "unable to open endpoint", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    /* Future errors are handled by the default error handler */
    psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);

    err = psm2_mq_init(ep,
		      0xffff000000000000ULL,
		      NULL,
		      0,
		      &mq);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "psm2 init", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    ompi_mtl_psm2.ep   = ep;
    ompi_mtl_psm2.epid = epid;
    ompi_mtl_psm2.mq   = mq;

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &mca_mtl_psm2_component.super.mtl_version,
                    &ompi_mtl_psm2.epid,
                    sizeof(psm2_epid_t));

    if (OMPI_SUCCESS != rc) {
	opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
	return OMPI_ERROR;
    }


    /* register the psm2 progress function */
    opal_progress_register(ompi_mtl_psm2_progress);

    return OMPI_SUCCESS;
}
Пример #22
0
int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { 
    psm_error_t err;
    psm_ep_t	ep; /* endpoint handle */
    psm_mq_t	mq;
    psm_epid_t	epid; /* unique lid+port identifier */
    psm_uuid_t  unique_job_key;
    struct psm_ep_open_opts ep_opt;
    unsigned long long *uu = (unsigned long long *) unique_job_key;
    char *generated_key;
    char env_string[256];
    
    generated_key = getenv("OMPI_MCA_orte_precondition_transports");
    memset(uu, 0, sizeof(psm_uuid_t));
    
    if (!generated_key || (strlen(generated_key) != 33) ||
        sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
    {
      opal_show_help("help-mtl-psm.txt",
		     "no uuid present", true,
		     generated_key ? "could not be parsed from" :
		     "not present in", ompi_process_info.nodename);
      return OMPI_ERROR;
      
    }

    /* Handle our own errors for opening endpoints */
    psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);

    /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
     * contexts correctly.
     */
    snprintf(env_string, sizeof(env_string), "%d", local_rank);
    setenv("MPI_LOCALRANKID", env_string, 0);
    snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
    setenv("MPI_LOCALNRANKS", env_string, 0);
    
    /* Setup the endpoint options. */
    bzero((void*) &ep_opt, sizeof(ep_opt));
    ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
    ep_opt.unit = ompi_mtl_psm.ib_unit;
    ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */
    ep_opt.shm_mbytes = -1; /* Choose PSM defaults */
    ep_opt.sendbufs_num = -1; /* Choose PSM defaults */

#if PSM_VERNO >= 0x0101   
    ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
#endif
    
#if PSM_VERNO >= 0x0107
    ep_opt.port = ompi_mtl_psm.ib_port;
    ep_opt.outsl = ompi_mtl_psm.ib_service_level;
#endif

#if PSM_VERNO >= 0x010d
    ep_opt.service_id = ompi_mtl_psm.ib_service_id;
    ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
#endif

    /* Open PSM endpoint */
    err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
    if (err) {
      opal_show_help("help-mtl-psm.txt",
		     "unable to open endpoint", true,
		     psm_error_get_string(err));
      return OMPI_ERROR;
    }

    /* Future errors are handled by the default error handler */
    psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
    
    err = psm_mq_init(ep, 
		      0xffff000000000000ULL, 
		      NULL,
		      0,
		      &mq);
    if (err) {
      opal_show_help("help-mtl-psm.txt",
		     "psm init", true,
		     psm_error_get_string(err));
      return OMPI_ERROR;
    }

    ompi_mtl_psm.ep   = ep;
    ompi_mtl_psm.epid = epid;
    ompi_mtl_psm.mq   = mq;

    if (OMPI_SUCCESS != 
	ompi_modex_send( &mca_mtl_psm_component.super.mtl_version, 
                             &ompi_mtl_psm.epid, 
			     sizeof(psm_epid_t))) {
	opal_output(0, "Open MPI couldn't send PSM epid to head node process"); 
	return OMPI_ERROR;
    }

    /* register the psm progress function */
    opal_progress_register(ompi_mtl_psm_progress);
        
    return OMPI_SUCCESS;
}
Пример #23
0
static mca_mtl_base_module_t*
ompi_mtl_ofi_component_init(bool enable_progress_threads,
                            bool enable_mpi_threads)
{
    int ret, fi_version;
    struct fi_info *hints;
    struct fi_info *providers = NULL, *prov = NULL;
    struct fi_cq_attr cq_attr = {0};
    struct fi_av_attr av_attr = {0};
    char ep_name[FI_NAME_MAX] = {0};
    size_t namelen;

    /**
     * Hints to filter providers
     * See man fi_getinfo for a list of all filters
     * mode:  Select capabilities MTL is prepared to support.
     *        In this case, MTL will pass in context into communication calls
     * ep_type:  reliable datagram operation
     * caps:     Capabilities required from the provider.
     *           Tag matching is specified to implement MPI semantics.
     * msg_order: Guarantee that messages with same tag are ordered.
     */
    hints = fi_allocinfo();
    if (!hints) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: Could not allocate fi_info\n",
                            __FILE__, __LINE__);
        goto error;
    }
    hints->mode               = FI_CONTEXT;
    hints->ep_attr->type      = FI_EP_RDM;      /* Reliable datagram         */
    hints->caps               = FI_TAGGED;      /* Tag matching interface    */
    hints->tx_attr->msg_order = FI_ORDER_SAS;
    hints->rx_attr->msg_order = FI_ORDER_SAS;

    hints->domain_attr->threading        = FI_THREAD_UNSPEC;

    if (MTL_OFI_PROG_AUTO == control_progress) {
        hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
    } else {
        hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
    }

    if (MTL_OFI_PROG_MANUAL == data_progress) {
        hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
    } else {
        hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
    }

    if (MTL_OFI_AV_TABLE == av_type) {
        hints->domain_attr->av_type          = FI_AV_TABLE;
    } else {
        hints->domain_attr->av_type          = FI_AV_MAP;
    }

    hints->domain_attr->resource_mgmt    = FI_RM_ENABLED;

    /**
     * FI_VERSION provides binary backward and forward compatibility support
     * Specify the version of OFI is coded to, the provider will select struct
     * layouts that are compatible with this version.
     */
    fi_version = FI_VERSION(1, 0);

    /**
     * fi_getinfo:  returns information about fabric  services for reaching a
     * remote node or service.  this does not necessarily allocate resources.
     * Pass NULL for name/service because we want a list of providers supported.
     */
    ret = fi_getinfo(fi_version,    /* OFI version requested                    */
                     NULL,          /* Optional name or fabric to resolve       */
                     NULL,          /* Optional service name or port to request */
                     0ULL,          /* Optional flag                            */
                     hints,        /* In: Hints to filter providers            */
                     &providers);   /* Out: List of matching providers          */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_getinfo failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Select a provider from the list returned by fi_getinfo().
     */
    prov = select_ofi_provider(providers);
    if (!prov) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: select_ofi_provider: no provider found\n",
                            __FILE__, __LINE__);
        goto error;
    }


    /**
     * Open fabric
     * The getinfo struct returns a fabric attribute struct that can be used to
     * instantiate the virtual or physical network. This opens a "fabric
     * provider". See man fi_fabric for details.
     */
    ret = fi_fabric(prov->fabric_attr,    /* In:  Fabric attributes             */
                    &ompi_mtl_ofi.fabric, /* Out: Fabric handle                 */
                    NULL);                /* Optional context for fabric events */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_fabric failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Create the access domain, which is the physical or virtual network or
     * hardware port/collection of ports.  Returns a domain object that can be
     * used to create endpoints.  See man fi_domain for details.
     */
    ret = fi_domain(ompi_mtl_ofi.fabric,  /* In:  Fabric object                 */
                    prov,                 /* In:  Provider                      */
                    &ompi_mtl_ofi.domain, /* Out: Domain oject                  */
                    NULL);                /* Optional context for domain events */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_domain failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Create a transport level communication endpoint.  To use the endpoint,
     * it must be bound to completion counters or event queues and enabled,
     * and the resources consumed by it, such as address vectors, counters,
     * completion queues, etc.
     * see man fi_endpoint for more details.
     */
    ret = fi_endpoint(ompi_mtl_ofi.domain, /* In:  Domain object   */
                      prov,                /* In:  Provider        */
                      &ompi_mtl_ofi.ep,    /* Out: Endpoint object */
                      NULL);               /* Optional context     */
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_endpoint failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Save the maximum inject size.
     */
    ompi_mtl_ofi.max_inject_size = prov->tx_attr->inject_size;

    /**
     * Create the objects that will be bound to the endpoint.
     * The objects include:
     *     - completion queue for events
     *     - address vector of other endpoint addresses
     *     - dynamic memory-spanning memory region
     */
    cq_attr.format = FI_CQ_FORMAT_TAGGED;
    ret = fi_cq_open(ompi_mtl_ofi.domain, &cq_attr, &ompi_mtl_ofi.cq, NULL);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_cq_open failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * The remote fi_addr will be stored in the ofi_endpoint struct.
     */

    av_attr.type = (MTL_OFI_AV_TABLE == av_type) ? FI_AV_TABLE: FI_AV_MAP;

    ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_av_open failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Bind the CQ and AV to the endpoint object.
     */
    ret = fi_ep_bind(ompi_mtl_ofi.ep,
                     (fid_t)ompi_mtl_ofi.cq,
                     FI_SEND | FI_RECV);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_bind CQ-EP failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    ret = fi_ep_bind(ompi_mtl_ofi.ep,
                     (fid_t)ompi_mtl_ofi.av,
                     0);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_bind AV-EP failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Enable the endpoint for communication
     * This commits the bind operations.
     */
    ret = fi_enable(ompi_mtl_ofi.ep);
    if (0 != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_enable failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    /**
     * Free providers info since it's not needed anymore.
     */
    fi_freeinfo(hints);
    hints = NULL;
    fi_freeinfo(providers);
    providers = NULL;

    /**
     * Get our address and publish it with modex.
     */
    namelen = sizeof(ep_name);
    ret = fi_getname((fid_t)ompi_mtl_ofi.ep, &ep_name[0], &namelen);
    if (ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: fi_getname failed: %s\n",
                            __FILE__, __LINE__, fi_strerror(-ret));
        goto error;
    }

    OFI_COMPAT_MODEX_SEND(ret,
                          &mca_mtl_ofi_component.super.mtl_version,
                          &ep_name,
                          namelen);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: modex_send failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    ompi_mtl_ofi.epnamelen = namelen;

    /**
     * Set the ANY_SRC address.
     */
    ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC;

    /**
     * Activate progress callback.
     */
    ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    return &ompi_mtl_ofi.base;

error:
    if (providers) {
        (void) fi_freeinfo(providers);
    }
    if (hints) {
        (void) fi_freeinfo(hints);
    }
    if (ompi_mtl_ofi.av) {
        (void) fi_close((fid_t)ompi_mtl_ofi.av);
    }
    if (ompi_mtl_ofi.cq) {
        (void) fi_close((fid_t)ompi_mtl_ofi.cq);
    }
    if (ompi_mtl_ofi.ep) {
        (void) fi_close((fid_t)ompi_mtl_ofi.ep);
    }
    if (ompi_mtl_ofi.domain) {
        (void) fi_close((fid_t)ompi_mtl_ofi.domain);
    }
    if (ompi_mtl_ofi.fabric) {
        (void) fi_close((fid_t)ompi_mtl_ofi.fabric);
    }
    return NULL;
}
/*
 * Open the component
 */
static int iboffload_open(void)
{
    int rc;

    /* local variables */
    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;

    IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n"));

    (void) mca_bcol_iboffload_verify_params();

    cm->super.priority = 100;
    cm->super.n_net_contexts = 0;
    cm->super.network_contexts = NULL;

    OBJ_CONSTRUCT(&cm->recv_wrs.lock, opal_mutex_t);

    /* construct lists */
    OBJ_CONSTRUCT(&cm->devices, opal_pointer_array_t);
    rc = opal_pointer_array_init(&cm->devices, 10, INT_MAX, 10);
    if (OMPI_SUCCESS != rc) {
        goto close_device;
    }

    /* Check MCA parameters */
    if (0 != (mca_bcol_iboffload_component.exchange_tree_order & (mca_bcol_iboffload_component.exchange_tree_order - 1))) {
        IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", 
                         mca_bcol_iboffload_component.exchange_tree_order));
        mca_bcol_iboffload_component.exchange_tree_order = 2;
    }

    /* Pasha: Since we do not have max inline check like in openib,
       I will put some dummy check here. All mlnx devices support at least 512b */
    if (mca_bcol_iboffload_component.max_inline_data > 512) {
        IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
                    mca_bcol_iboffload_component.max_inline_data));
        rc = OMPI_ERROR;
        goto close_device;
    }

    /* Register the progress function */
    rc = opal_progress_register(mca_bcol_iboffload_component_progress);
    if (OMPI_SUCCESS != rc) {
        IBOFFLOAD_ERROR(("Failed to register the progress function"
                         " for iboffload component.\n"));
        goto close_device;
    }

    map_ompi_to_ib_dtype();
    map_ompi_to_ib_op_type();

    /* The init_done set to true on first component usage */
    cm->init_done = false;

    return OMPI_SUCCESS;

close_device:
    OBJ_DESTRUCT(&cm->devices);
    OBJ_DESTRUCT(&cm->recv_wrs.lock);
    return rc;
}
Пример #25
0
int ompi_mtl_mxm_module_init(void)
{
#if MXM_API < MXM_VERSION(2,0)
    ompi_mtl_mxm_ep_conn_info_t ep_info;
#endif
    void *ep_address;
    size_t ep_address_len;
    mxm_error_t err;
    uint32_t jobid;
    uint64_t mxlr;
    ompi_proc_t **procs;
    unsigned ptl_bitmap;
    size_t totps, proc;
    int lr, nlps;
    int rc;

    mxlr = 0;
    lr = -1;

    jobid = ompi_mtl_mxm_get_job_id();
    if (0 == jobid) {
        MXM_ERROR("Failed to generate jobid");
        return OMPI_ERROR;
    }

    if (NULL == (procs = ompi_proc_world(&totps))) {
        MXM_ERROR("Unable to obtain process list");
        return OMPI_ERROR;
    }

    if (totps < (size_t)ompi_mtl_mxm.mxm_np) {
        MXM_VERBOSE(1, "MXM support will be disabled because of total number "
                    "of processes (%lu) is less than the minimum set by the "
                    "mtl_mxm_np MCA parameter (%u)", totps, ompi_mtl_mxm.mxm_np);
        return OMPI_ERR_NOT_SUPPORTED;
    }
    MXM_VERBOSE(1, "MXM support enabled");

    if (ORTE_NODE_RANK_INVALID == (lr = ompi_process_info.my_node_rank)) {
        MXM_ERROR("Unable to obtain local node rank");
        return OMPI_ERROR;
    }
    nlps = ompi_process_info.num_local_peers + 1;

    for (proc = 0; proc < totps; proc++) {
        if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
            mxlr = max(mxlr, procs[proc]->proc_name.vpid);
        }
    }

    /* Setup the endpoint options and local addresses to bind to. */
#if MXM_API < MXM_VERSION(2,0)
    ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
#else
    ptl_bitmap = 0;
#endif

    /* Open MXM endpoint */
    err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep,
                                 ptl_bitmap, lr, jobid, mxlr, nlps);
    if (MXM_OK != err) {
        opal_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
                       mxm_error_string(err));
        return OMPI_ERROR;
    }

    /*
     * Get address for each PTL on this endpoint, and share it with other ranks.
     */
#if MXM_API < MXM_VERSION(2,0)
    if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
            OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
        return OMPI_ERROR;
    }
    if ((ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) &&
            OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
        return OMPI_ERROR;
    }
    if ((ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) &&
            OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
        return OMPI_ERROR;
    }

    ep_address = &ep_info;
    ep_address_len = sizeof(ep_info);
#else
    rc = ompi_mtl_mxm_get_ep_address(&ep_address, &ep_address_len);
    if (OMPI_SUCCESS != rc) {
        return rc;
    }
#endif

    rc = ompi_mtl_mxm_send_ep_address(ep_address, ep_address_len);
    if (OMPI_SUCCESS != rc) {
        MXM_ERROR("Modex session failed.");
        return rc;
    }

#if MXM_API >= MXM_VERSION(2,0)
    free(ep_address);
#endif

    /* Register the MXM progress function */
    opal_progress_register(ompi_mtl_mxm_progress);

#if MXM_API >= MXM_VERSION(2,0)
    if (ompi_mtl_mxm.using_mem_hooks) {
        opal_mem_hooks_register_release(ompi_mtl_mxm_mem_release_cb, NULL);
    }
#endif
    return OMPI_SUCCESS;
}
Пример #26
0
int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm,
                         ompi_coll_libnbc_module_t *module, bool persistent,
                         ompi_request_t **request, void *tmpbuf) {
  int ret, tmp_tag;
  bool need_register = false;
  ompi_coll_libnbc_request_t *handle;

  /* no operation (e.g. one process barrier)? */
  if (((int *)schedule->data)[0] == 0 && schedule->data[sizeof(int)] == 0) {
    ret = nbc_get_noop_request(persistent, request);
    if (OMPI_SUCCESS != ret) {
      return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* update the module->tag here because other processes may have operations
     * and they may update the module->tag */
    OPAL_THREAD_LOCK(&module->mutex);
    tmp_tag = module->tag--;
    if (tmp_tag == MCA_COLL_BASE_TAG_NONBLOCKING_END) {
      tmp_tag = module->tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
      NBC_DEBUG(2,"resetting tags ...\n");
    }
    OPAL_THREAD_UNLOCK(&module->mutex);

    OBJ_RELEASE(schedule);
    free(tmpbuf);

    return OMPI_SUCCESS;
  }

  OMPI_COLL_LIBNBC_REQUEST_ALLOC(comm, persistent, handle);
  if (NULL == handle) return OMPI_ERR_OUT_OF_RESOURCE;

  handle->tmpbuf = NULL;
  handle->req_count = 0;
  handle->req_array = NULL;
  handle->comm = comm;
  handle->schedule = NULL;
  handle->row_offset = 0;
  handle->nbc_complete = persistent ? true : false;

  /******************** Do the tag and shadow comm administration ...  ***************/

  OPAL_THREAD_LOCK(&module->mutex);
  tmp_tag = module->tag--;
  if (tmp_tag == MCA_COLL_BASE_TAG_NONBLOCKING_END) {
      tmp_tag = module->tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
      NBC_DEBUG(2,"resetting tags ...\n");
  }

  if (true != module->comm_registered) {
      module->comm_registered = true;
      need_register = true;
  }
  OPAL_THREAD_UNLOCK(&module->mutex);

  handle->tag = tmp_tag;

  /* register progress */
  if (need_register) {
      int32_t tmp =
          OPAL_THREAD_ADD_FETCH32(&mca_coll_libnbc_component.active_comms, 1);
      if (tmp == 1) {
          opal_progress_register(ompi_coll_libnbc_progress);
      }
  }

  handle->comm=comm;
  /*printf("got module: %lu tag: %i\n", module, module->tag);*/

  /******************** end of tag and shadow comm administration ...  ***************/
  handle->comminfo = module;

  NBC_DEBUG(3, "got tag %i\n", handle->tag);

  handle->tmpbuf = tmpbuf;
  handle->schedule = schedule;
  *request = (ompi_request_t *) handle;

  return OMPI_SUCCESS;
}
Пример #27
0
int ompio_io_ompio_file_iread (mca_io_ompio_file_t *fh,
			       void *buf,
			       int count,
			       struct ompi_datatype_t *datatype,
			       ompi_request_t **request)
{
    int ret = OMPI_SUCCESS;
    mca_ompio_request_t *ompio_req=NULL;

    ompio_req = OBJ_NEW(mca_ompio_request_t);
    ompio_req->req_type = MCA_OMPIO_REQUEST_READ;
    ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE;

    if ( 0 == count ) {
	ompi_request_complete (&ompio_req->req_ompi, 0);
	ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
	ompio_req->req_ompi.req_status._ucount = 0;
	return OMPI_SUCCESS;
    }

    if ( NULL != fh->f_fbtl->fbtl_ipreadv ) {
	// This fbtl has support for non-blocking operations

	size_t total_bytes_read = 0;       /* total bytes that have been read*/
	uint32_t iov_count = 0;
	struct iovec *decoded_iov = NULL;

	size_t max_data = 0;
	int i = 0; /* index into the decoded iovec of the buffer */
	int j = 0; /* index into the file vie iovec */

	ompi_io_ompio_decode_datatype (fh,
				       datatype,
				       count,
				       buf,
				       &max_data,
				       &decoded_iov,
				       &iov_count);

	// Non-blocking operations have to occur in a single cycle
	j = fh->f_index_in_file_view;

	mca_io_ompio_build_io_array ( fh,
				      0,         // index
				      1,         // no. of cyces
				      max_data,  // setting bytes per cycle to match data
				      max_data,
				      iov_count,
				      decoded_iov,
				      &i,
				      &j,
				      &total_bytes_read);

	if (fh->f_num_of_io_entries) {
	  fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req);
	}

	if ( false == mca_io_ompio_progress_is_registered ) {
            // Lazy initialization of progress function to minimize impact
            // on other ompi functionality in case its not used.
            opal_progress_register (mca_io_ompio_component_progress);
            mca_io_ompio_progress_is_registered=true;
        }

	fh->f_num_of_io_entries = 0;
	if (NULL != fh->f_io_array) {
	    free (fh->f_io_array);
	    fh->f_io_array = NULL;
	}

	if (NULL != decoded_iov) {
	    free (decoded_iov);
	    decoded_iov = NULL;
	}
    }
    else {
	// This fbtl does not  support non-blocking operations
	ompi_status_public_t status;
	ret = ompio_io_ompio_file_read (fh, buf, count, datatype, &status);

	ompi_request_complete (&ompio_req->req_ompi, 0);
	ompio_req->req_ompi.req_status.MPI_ERROR = ret;
	ompio_req->req_ompi.req_status._ucount = status._ucount;
    }

    *request = (ompi_request_t *) ompio_req;
    return ret;
}
Пример #28
0
int mca_spml_ucx_add_procs(oshmem_proc_t** procs, size_t nprocs)
{
    size_t i, n;
    int rc = OSHMEM_ERROR;
    int my_rank = oshmem_my_proc_id();
    ucs_status_t err;
    ucp_address_t *wk_local_addr;
    size_t wk_addr_len;
    int *wk_roffs, *wk_rsizes;
    char *wk_raddrs;


    mca_spml_ucx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(mca_spml_ucx.ucp_peers)));
    if (NULL == mca_spml_ucx.ucp_peers) {
        goto error;
    }

    err = ucp_worker_get_address(mca_spml_ucx.ucp_worker, &wk_local_addr, &wk_addr_len);
    if (err != UCS_OK) {
        goto error;
    }
    dump_address(my_rank, (char *)wk_local_addr, wk_addr_len);

    rc = oshmem_shmem_xchng(wk_local_addr, wk_addr_len, nprocs,
            (void **)&wk_raddrs, &wk_roffs, &wk_rsizes);
    if (rc != OSHMEM_SUCCESS) {
        goto error;
    }

    opal_progress_register(spml_ucx_progress);

    /* Get the EP connection requests for all the processes from modex */
    for (n = 0; n < nprocs; ++n) {
        i = (my_rank + n) % nprocs;
        dump_address(i, (char *)(wk_raddrs + wk_roffs[i]), wk_rsizes[i]);
        err = ucp_ep_create(mca_spml_ucx.ucp_worker, 
                (ucp_address_t *)(wk_raddrs + wk_roffs[i]),
                &mca_spml_ucx.ucp_peers[i].ucp_conn);
        if (UCS_OK != err) {
            SPML_ERROR("ucp_ep_create failed!!!\n");
            goto error2;
        }
        procs[i]->num_transports = 1;
        procs[i]->transport_ids = spml_ucx_transport_ids;
    }

    ucp_worker_release_address(mca_spml_ucx.ucp_worker, wk_local_addr);
    free(wk_raddrs);
    free(wk_rsizes);
    free(wk_roffs);

    SPML_VERBOSE(50, "*** ADDED PROCS ***");
    return OSHMEM_SUCCESS;

error2:
    for (i = 0; i < nprocs; ++i) {
         if (mca_spml_ucx.ucp_peers[i].ucp_conn) {
             ucp_ep_destroy(mca_spml_ucx.ucp_peers[i].ucp_conn);
         }
    }
    if (mca_spml_ucx.ucp_peers) 
        free(mca_spml_ucx.ucp_peers);
    if (wk_raddrs)
        free(wk_raddrs);
    if (wk_rsizes)
        free(wk_rsizes);
    if (wk_roffs)
        free(wk_roffs);
    if (mca_spml_ucx.ucp_peers)
        free(mca_spml_ucx.ucp_peers);
error:
    rc = OSHMEM_ERR_OUT_OF_RESOURCE;
    SPML_ERROR("add procs FAILED rc=%d", rc);
    return rc;

}
Пример #29
0
static int
component_init(bool enable_progress_threads, bool enable_mpi_threads)
{
    int ret;
    ptl_ni_limits_t actual;

    ret = PtlInit();
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlInit failed: %d\n",
                            __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlNIInit(PTL_IFACE_DEFAULT,
                    PTL_NI_PHYSICAL | PTL_NI_MATCHING,
                    PTL_PID_ANY,
                    NULL,
                    &actual,
                    &mca_osc_portals4_component.matching_ni_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlNIInit failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* BWB: FIX ME: Need to make sure our ID matches with the MTL... */

    mca_osc_portals4_component.matching_atomic_max = actual.max_atomic_size;
    mca_osc_portals4_component.matching_fetch_atomic_max = actual.max_fetch_atomic_size;
    mca_osc_portals4_component.matching_atomic_ordered_size =
        MAX(actual.max_waw_ordered_size, actual.max_war_ordered_size);

    ret = PtlEQAlloc(mca_osc_portals4_component.matching_ni_h,
                     4096,
                     &mca_osc_portals4_component.matching_eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlEQAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    ret = PtlPTAlloc(mca_osc_portals4_component.matching_ni_h,
                     0,
                     mca_osc_portals4_component.matching_eq_h,
                     4,
                     &mca_osc_portals4_component.matching_pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlPTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    OBJ_CONSTRUCT(&mca_osc_portals4_component.requests, opal_free_list_t);
    ret = opal_free_list_init (&mca_osc_portals4_component.requests,
                               sizeof(ompi_osc_portals4_request_t),
                               opal_cache_line_size,
                               OBJ_CLASS(ompi_osc_portals4_request_t),
                               0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_free_list_init failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    ret = opal_progress_register(progress_callback);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    return OMPI_SUCCESS;
}
Пример #30
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    ompi_attribute_fn_ptr_union_t del_fn;
    ompi_attribute_fn_ptr_union_t copy_fn;
    mca_coll_hcoll_component_t *cm;
    int err;
    int rc;
    cm = &mca_coll_hcoll_component;
    *priority = 0;
    module = NULL;

    if (!cm->hcoll_enable) {
        return NULL;
    }

    if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np
            || ompi_comm_size(comm) < 2) {
        return NULL;
    }


    if (!cm->libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init
           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(mca_coll_hcoll_progress);

        HCOL_VERBOSE(10,"Calling hcoll_init();");
#if HCOLL_API >= HCOLL_VERSION(3,2)
        hcoll_read_init_opts(&cm->init_opts);
        cm->init_opts->base_tag = MCA_COLL_BASE_TAG_HCOLL_BASE;
        cm->init_opts->max_tag = mca_pml.pml_max_tag;
        cm->init_opts->enable_thread_support = ompi_mpi_thread_multiple;

        rc = hcoll_init_with_opts(&cm->init_opts);
#else
        hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
        rc = hcoll_init();
#endif

        if (HCOLL_SUCCESS != rc) {
            cm->hcoll_enable = 0;
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol library init failed");
            return NULL;
        }

#if HCOLL_API >= HCOLL_VERSION(3,2)
        if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) {
#else
        if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) {
#endif
            opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL);
        } else {
            cm->using_mem_hooks = 0;
        }

        copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
        del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
        err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL);
        if (OMPI_SUCCESS != err) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol comm keyval create failed");
            return NULL;
        }

        if (mca_coll_hcoll_component.derived_types_support_enabled) {
            copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function *) MPI_TYPE_NULL_COPY_FN;
            del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn;
            err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL);
            if (OMPI_SUCCESS != err) {
                cm->hcoll_enable = 0;
                hcoll_finalize();
                opal_progress_unregister(mca_coll_hcoll_progress);
                HCOL_ERROR("Hcol type keyval create failed");
                return NULL;
            }
        }
        OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t);
        opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t),
                            8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0,
                            32, -1, 32, NULL, 0, NULL, NULL, NULL);

    }

    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module) {
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->comm = comm;

    HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d",
                 (void*)comm,comm->c_contextid,ompi_comm_size(comm));

    hcoll_module->hcoll_context =
        hcoll_create_context((rte_grp_handle_t)comm);

    if (NULL == hcoll_module->hcoll_context) {
        HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
        OBJ_RELEASE(hcoll_module);
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL;
    hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL;
    hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL;
    hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL;
#else
    hcoll_module->super.coll_iallgatherv = NULL;
#endif
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL;
#else
    hcoll_module->super.coll_ireduce = NULL;
#endif
    hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL;
    hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL;
    hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL;
#if HCOLL_API >= HCOLL_VERSION(3,7)
    hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL;
#else
    hcoll_module->super.coll_ialltoallv = NULL;
#endif
    *priority = cm->hcoll_priority;
    module = &hcoll_module->super;

    if (!cm->libhcoll_initialized) {
        cm->libhcoll_initialized = true;
    }

    return module;
}


OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
                   mca_coll_base_module_t,
                   mca_coll_hcoll_module_construct,
                   mca_coll_hcoll_module_destruct);