Esempio n. 1
0
int
ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
                         struct opal_convertor_t *convertor,
                         struct ompi_message_t **message,
                         struct mca_mtl_request_t *mtl_request)
{
    ompi_mtl_portals4_recv_request_t *ptl_request =
        (ompi_mtl_portals4_recv_request_t*) mtl_request;
    void *start;
    size_t length;
    bool free_after;
    int ret;
    ompi_mtl_portals4_message_t *ptl_message =
        (ompi_mtl_portals4_message_t*) (*message)->req_ptr;

    ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

#if OPAL_ENABLE_DEBUG
    ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
    ptl_request->hdr_data = 0;
#endif
    ptl_request->super.type = portals4_req_recv;
    ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
    ptl_request->buffer_ptr = (free_after) ? start : NULL;
    ptl_request->convertor = convertor;
    ptl_request->delivery_ptr = start;
    ptl_request->delivery_len = length;
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
    ptl_request->pending_reply = 0;

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Mrecv %lu of length %ld (0x%lx)\n",
                         ptl_request->opcount,
                         (int64_t)length, (unsigned long) ptl_request));

    (*message) = MPI_MESSAGE_NULL;

    return ompi_mtl_portals4_recv_progress(&(ptl_message->ev), &ptl_request->super);
}
Esempio n. 2
0
int mca_coll_hcoll_igatherv(const void* sbuf, int scount,
                            struct ompi_datatype_t *sdtype,
                            void* rbuf, const int *rcounts, const int *displs,
                            struct ompi_datatype_t *rdtype,
                            int root,
                            struct ompi_communicator_t *comm,
                            ompi_request_t ** request,
                            mca_coll_base_module_t *module)
{
    dte_data_representation_t stype;
    dte_data_representation_t rtype;
    int rc;
    void** rt_handle;
    HCOL_VERBOSE(20,"RUNNING HCOL IGATHERV");
    mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
    rt_handle = (void**) request;
    stype = ompi_dtype_2_hcoll_dtype(sdtype, NO_DERIVED);
    rtype = ompi_dtype_2_hcoll_dtype(rdtype, NO_DERIVED);
    if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) {
        /*If we are here then datatype is not simple predefined datatype */
        /*In future we need to add more complex mapping to the dte_data_representation_t */
        /* Now use fallback */
        HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback igatherv;",
                     sdtype->super.name,
                     rdtype->super.name);
        rc = hcoll_module->previous_igatherv(sbuf,scount,sdtype,
                                           rbuf, rcounts, displs, rdtype,root,
                                           comm, request,
                                           hcoll_module->previous_igatherv_module);
        return rc;
    }
    rc = hcoll_collectives.coll_igatherv((void *)sbuf, scount, stype, rbuf, (int *)rcounts, (int *)displs, rtype, root, hcoll_module->hcoll_context, rt_handle);
    if (HCOLL_SUCCESS != rc){
        HCOL_VERBOSE(20,"RUNNING FALLBACK IGATHERV");
        rc = hcoll_module->previous_igatherv(sbuf,scount,sdtype,
                                           rbuf, rcounts, displs, rdtype,root,
                                           comm, request,
                                           hcoll_module->previous_igatherv_module);
    }
    return rc;

}
Esempio n. 3
0
/* Return the largest size data size that can be packed into max_len using the
 * given convertor.  For example, a 1000 byte max_len buffer may only be able
 * to hold 998 bytes if an indivisible convertor element straddles the 1000
 * byte boundary.
 *
 * This routine internally clones the convertor and does not mutate it!
 */
size_t opal_btl_usnic_convertor_pack_peek(
    const opal_convertor_t *conv,
    size_t max_len)
{
    int rc;
    size_t packable_len, position;
    opal_convertor_t temp;

    OBJ_CONSTRUCT(&temp, opal_convertor_t);
    position = conv->bConverted + max_len;
    rc = opal_convertor_clone_with_position(conv, &temp, 1, &position);
    if (OPAL_UNLIKELY(rc < 0)) {
        BTL_ERROR(("unexpected convertor error"));
        abort(); /* XXX */
    }
    assert(position >= conv->bConverted);
    packable_len = position - conv->bConverted;
    OBJ_DESTRUCT(&temp);
    return packable_len;
}
Esempio n. 4
0
int mca_btl_ugni_ep_handle_cleanup (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
    int rc;

    if (0 == ep_handle->gni_handle) {
        return OPAL_SUCCESS;
    }

    /* TODO: need to fix, may be outstanding tx's, etc. */
    rc = GNI_EpUnbind (ep_handle->gni_handle);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
        /* should warn */
    } else {
        (void) GNI_EpDestroy (ep_handle->gni_handle);
    }

    ep_handle->gni_handle = 0;

    return OPAL_SUCCESS;
}
static int mca_btl_ugni_smsg_setup (void) {
    gni_smsg_attr_t tmp_smsg_attrib;
    unsigned int mbox_size;
    size_t nprocs;
    gni_return_t rc;

    (void) ompi_proc_world (&nprocs);

    if (0 == mca_btl_ugni_component.ugni_smsg_limit) {
        /* auto-set the smsg limit based on the number of ranks */
        if (nprocs <= 512) {
            mca_btl_ugni_component.ugni_smsg_limit = 8192;
        } else if (nprocs <= 1024) {
            mca_btl_ugni_component.ugni_smsg_limit = 2048;
        } else if (nprocs <= 8192) {
            mca_btl_ugni_component.ugni_smsg_limit = 1024;
        } else if (nprocs <= 16384) {
            mca_btl_ugni_component.ugni_smsg_limit = 512;
        } else {
            mca_btl_ugni_component.ugni_smsg_limit = 256;
        }
    }

    mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit -
        sizeof (mca_btl_ugni_send_frag_hdr_t);

    /* calculate mailbox size */
    tmp_smsg_attrib.msg_type       = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
    tmp_smsg_attrib.msg_maxsize    = mca_btl_ugni_component.ugni_smsg_limit;
    tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;

    rc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
        BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
        return ompi_common_rc_ugni_to_ompi (rc);
    }

    mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);

    return OMPI_SUCCESS;
}
int mca_pml_crcpw_enable(bool enable)
{
    int ret;
    ompi_crcp_base_pml_state_t * pml_state = NULL;

    if( OPAL_UNLIKELY(NULL == ompi_crcp.pml_enable) ) {
        return mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable);
    }

    PML_CRCP_STATE_ALLOC(pml_state, ret);

    pml_state->wrapped_pml_component = &(mca_pml_crcpw_module.wrapped_pml_component);
    pml_state->wrapped_pml_module    = &(mca_pml_crcpw_module.wrapped_pml_module);

    pml_state->state = OMPI_CRCP_PML_PRE;
    pml_state = ompi_crcp.pml_enable(enable, pml_state);
    if( OMPI_SUCCESS != pml_state->error_code) {
        ret =  pml_state->error_code;
        PML_CRCP_STATE_RETURN(pml_state);
        return ret;
    }

    if( OMPI_CRCP_PML_SKIP != pml_state->state) {
        if( OMPI_SUCCESS != (ret = mca_pml_crcpw_module.wrapped_pml_module.pml_enable(enable) ) ) {
            PML_CRCP_STATE_RETURN(pml_state);
            return ret;
        }
    }
    
    pml_state->state = OMPI_CRCP_PML_POST;
    pml_state = ompi_crcp.pml_enable(enable, pml_state);
    if( OMPI_SUCCESS != pml_state->error_code) {
        ret =  pml_state->error_code;
        PML_CRCP_STATE_RETURN(pml_state);
        return ret;
    }

    PML_CRCP_STATE_RETURN(pml_state);

    return OMPI_SUCCESS;
}
Esempio n. 7
0
/**
 * Handle the CUDA buffer.
 */
int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
                                        mca_bml_base_btl_t* bml_btl,
                                        size_t size) {
    int rc;
#if OPAL_CUDA_SUPPORT_41
    sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
    if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
        unsigned char *base;
        opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
        /* Set flag back */
        sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
        if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_cuda_btls(
                                                                           sendreq->req_endpoint,
                                                                           base,
                                                                           sendreq->req_send.req_bytes_packed,
                                                                           sendreq->req_rdma))) {
            rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl,
                                                     sendreq->req_send.req_bytes_packed);
            if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
                mca_pml_bfo_free_rdma_resources(sendreq);
            }
        } else {
            if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_PUT) {
                rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size,
                                                         MCA_PML_BFO_HDR_FLAGS_CONTIG);
            } else {
                rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
            }
        }
    } else {
        /* Do not send anything with first rendezvous message as copying GPU
         * memory into RNDV message is expensive. */
        sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
        rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
    }
#else
    /* Just do the rendezvous but set initial data to be sent to zero */
    rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
#endif /* OPAL_CUDA_SUPPORT_41 */
    return rc;
}
Esempio n. 8
0
int mca_btl_ugni_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
{
    mca_btl_ugni_base_frag_t *frag;
    int rc;

    while (NULL !=
           (frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) {
        rc = mca_btl_ugni_send_frag (endpoint, frag);
        if (OPAL_UNLIKELY(OMPI_SUCCESS > rc)) {
            if (OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc)) {
                opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
            } else {
                mca_btl_ugni_frag_complete (frag, rc);
            }

            return rc;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 9
0
/* Get a context to use for communication.
 * If TLS is supported, it will use the cached endpoint.
 * If not, it will invoke the normal round-robin assignment. */
mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl)
{
#if OPAL_HAVE_THREAD_LOCAL
    /* With TLS, we cache the context we use. */
    static volatile int64_t cur_num = 0;

    if (OPAL_UNLIKELY(my_context == NULL)) {
        OPAL_THREAD_LOCK(&btl->module_lock);

        my_context = &btl->contexts[cur_num];
        cur_num = (cur_num + 1) %btl->num_contexts;

        OPAL_THREAD_UNLOCK(&btl->module_lock);
    }

    assert (my_context);
    return my_context;
#else
    return get_ofi_context_rr(btl);
#endif
}
Esempio n. 10
0
int mca_coll_hcoll_allgather(const void *sbuf, int scount,
                            struct ompi_datatype_t *sdtype,
                            void *rbuf, int rcount,
                            struct ompi_datatype_t *rdtype,
                            struct ompi_communicator_t *comm,
                            mca_coll_base_module_t *module)
{
    dte_data_representation_t stype;
    dte_data_representation_t rtype;
    int rc;
    HCOL_VERBOSE(20,"RUNNING HCOL ALLGATHER");
    mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
    stype = ompi_dtype_2_hcoll_dtype(sdtype, TRY_FIND_DERIVED);
    rtype = ompi_dtype_2_hcoll_dtype(rdtype, TRY_FIND_DERIVED);
    if (sbuf == MPI_IN_PLACE) {
        stype = rtype;
    }
    if (OPAL_UNLIKELY(HCOL_DTE_IS_ZERO(stype) || HCOL_DTE_IS_ZERO(rtype))) {
        /*If we are here then datatype is not simple predefined datatype */
        /*In future we need to add more complex mapping to the dte_data_representation_t */
        /* Now use fallback */
        HCOL_VERBOSE(20,"Ompi_datatype is not supported: sdtype = %s, rdtype = %s; calling fallback allgather;",
                     sdtype->super.name,
                     rdtype->super.name);
        rc = hcoll_module->previous_allgather(sbuf,scount,sdtype,
                                             rbuf,rcount,rdtype,
                                             comm,
                                             hcoll_module->previous_allgather_module);
        return rc;
    }
    rc = hcoll_collectives.coll_allgather((void *)sbuf,scount,stype,rbuf,rcount,rtype,hcoll_module->hcoll_context);
    if (HCOLL_SUCCESS != rc){
        HCOL_VERBOSE(20,"RUNNING FALLBACK ALLGATHER");
        rc = hcoll_module->previous_allgather(sbuf,scount,sdtype,
                                             rbuf,rcount,rdtype,
                                             comm,
                                             hcoll_module->previous_allgather_module);
    }
    return rc;
}
Esempio n. 11
0
int
ompi_mtl_portals4_recv_short_init(void)
{
    int i;

    OBJ_CONSTRUCT(&ompi_mtl_portals4.short_block_mutex, opal_mutex_t);
    OBJ_CONSTRUCT(&(ompi_mtl_portals4.recv_short_blocks), opal_list_t);

    /* create the recv blocks */
    for (i = 0 ; i < ompi_mtl_portals4.recv_short_num ; ++i) {
        ompi_mtl_portals4_recv_short_block_t *block =
            ompi_mtl_portals4_recv_short_block_alloc(false);
        if (OPAL_UNLIKELY(NULL == block)) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        opal_list_append(&ompi_mtl_portals4.recv_short_blocks,
                         &block->base);
        ompi_mtl_portals4_activate_block(block);
    }

    return OMPI_SUCCESS;
}
Esempio n. 12
0
int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
                             uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
                             mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
                             int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    mca_rcache_base_registration_t *reg;
    void *rem_ptr;

    reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);
    if (OPAL_UNLIKELY(NULL == reg)) {
        return OPAL_ERROR;
    }

    vader_memmove (rem_ptr, local_address, size);

    vader_return_registration (reg, endpoint);

    /* always call the callback function */
    cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);

    return OPAL_SUCCESS;
}
Esempio n. 13
0
File: nbc.c Progetto: ICLDisco/ompi
int NBC_Start(NBC_Handle *handle) {
  int res;

  /* bozo case */
  if ((ompi_request_t *)handle == &ompi_request_empty) {
    return OMPI_SUCCESS;
  }

  /* kick off first round */
  handle->super.req_state = OMPI_REQUEST_ACTIVE;
  handle->super.req_status.MPI_ERROR = OMPI_SUCCESS;
  res = NBC_Start_round(handle);
  if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
    return res;
  }

  OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock);
  opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super));
  OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock);

  return OMPI_SUCCESS;
}
Esempio n. 14
0
static inline
int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const void *origin_addr, int origin_count,
                                            struct ompi_datatype_t *origin_datatype, void *result_addr, int result_count,
                                            struct ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer,
                                            int target_rank, MPI_Aint target_disp, int target_count,
                                            struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
                                            ompi_osc_rdma_request_t *request)
{
    ompi_osc_rdma_module_t *module = sync->module;
    mca_btl_base_registration_handle_t *target_handle;
    uint64_t target_address;
    int ret;

    /* short-circuit case. note that origin_count may be 0 if op is MPI_NO_OP */
    if ((result_addr && 0 == result_count) || 0 == target_count) {
        if (request) {
            ompi_osc_rdma_request_complete (request, MPI_SUCCESS);
        }

        return OMPI_SUCCESS;
    }

    ret = osc_rdma_get_remote_segment (module, peer, target_disp, target_datatype->super.size * target_count,
                                       &target_address, &target_handle);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    if (ompi_osc_rdma_peer_local_base (peer)) {
        /* local/self optimization */
        return ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count,
                                         result_datatype, peer, target_address, target_handle, target_count,
                                         target_datatype, op, module, request);
    }

    return ompi_osc_rdma_gacc_master (sync, origin_addr, origin_count, origin_datatype, result_addr, result_count,
                                      result_datatype, peer, target_address, target_handle, target_count,
                                      target_datatype, op, request);
}
Esempio n. 15
0
int
mca_pml_cm_irecv_init(void *addr,
                      size_t count,
                      ompi_datatype_t * datatype,
                      int src,
                      int tag,
                      struct ompi_communicator_t *comm,
                      struct ompi_request_t **request)
{
    int ret;
    mca_pml_cm_hvy_recv_request_t *recvreq;
    ompi_proc_t* ompi_proc;
    
    MCA_PML_CM_HVY_RECV_REQUEST_ALLOC(recvreq, ret);
    if( OPAL_UNLIKELY(OMPI_SUCCESS != ret) ) return ret;
    
    MCA_PML_CM_HVY_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, tag, src, 
                                     datatype, addr, count, true); 
    
    *request = (ompi_request_t*) recvreq;

    return OMPI_SUCCESS;
}
Esempio n. 16
0
/**
 * Hierarchical non-blocking barrier
 */
int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm,
                               ompi_request_t **req,
                               mca_coll_base_module_t *module)
{
    int rc;
    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;

#if OPAL_ENABLE_DEBUG
    static int barriers_count = 0;
#endif

    ML_VERBOSE(10, ("IBarrier num %d start.", ++barriers_count));

    rc = mca_coll_ml_barrier_launch(ml_module, req);
    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
        ML_ERROR(("Failed to launch a barrier."));
        return rc;
    }

    ML_VERBOSE(10, ("IBarrier num %d was done.", barriers_count));

    return OMPI_SUCCESS;
}
Esempio n. 17
0
static inline int ompi_osc_pt2pt_get_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, void *target, int target_count, ompi_datatype_t *target_datatype,
                                           OPAL_PTRDIFF_TYPE source_disp, int source_count, ompi_datatype_t *source_datatype,
                                           ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request)
{
    void *source = (unsigned char*) module->baseptr +
        ((unsigned long) source_disp * module->disp_unit);
    int ret;

    /* if we are in active target mode wait until all post messages arrive */
    ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync);

    ret = ompi_datatype_sndrcv (source, source_count, source_datatype,
                                target, target_count, target_datatype);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    if (request) {
        ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS);
    }

    return OMPI_SUCCESS;
}
Esempio n. 18
0
int mca_coll_ml_allgather_nb(const void *sbuf, int scount,
                             struct ompi_datatype_t *sdtype,
                             void* rbuf, int rcount,
                             struct ompi_datatype_t *rdtype,
                             struct ompi_communicator_t *comm,
                             ompi_request_t **req,
                             mca_coll_base_module_t *module)
{
    int ret;

    ML_VERBOSE(10, ("Starting non-blocking allgather"));

    ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype,
                                       rbuf, rcount, rdtype,
                                       comm, module, req);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        return ret;
    }

    ML_VERBOSE(10, ("Non-blocking allgather started"));

    return ret;
}
Esempio n. 19
0
int mca_btl_vader_get_sc_emu (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
                              uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
                              mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
                              int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
    mca_btl_vader_frag_t *frag;

    if (size > mca_btl_vader.super.btl_get_limit) {
        return OPAL_ERR_NOT_AVAILABLE;
    }

    frag = mca_btl_vader_rdma_frag_alloc (btl, endpoint, MCA_BTL_VADER_OP_GET, 0, 0, 0, order, flags, size,
                                          local_address, remote_address, cbfunc, cbcontext, cbdata,
                                          mca_btl_vader_sc_emu_get_complete);
    if (OPAL_UNLIKELY(NULL == frag)) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* send is always successful */
    (void) mca_btl_vader_send (btl, endpoint, &frag->base, MCA_BTL_TAG_VADER);

    return OPAL_SUCCESS;
}
Esempio n. 20
0
int ompi_osc_rdma_accumulate (const void *origin_addr, int origin_count,
                              struct ompi_datatype_t *origin_datatype, int target_rank,
                              OPAL_PTRDIFF_TYPE target_disp, int target_count,
                              struct ompi_datatype_t *target_datatype, struct ompi_op_t *op,
                              struct ompi_win_t *win)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    ompi_osc_rdma_peer_t *peer;
    ompi_osc_rdma_sync_t *sync;

    sync = ompi_osc_rdma_module_sync_lookup (module, target_rank, &peer);
    if (OPAL_UNLIKELY(NULL == sync)) {
        return OMPI_ERR_RMA_SYNC;
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "acc: 0x%lx, %d, %s, %d, 0x%lx, %d, %s, %s, %s",
                         (unsigned long) origin_addr, origin_count, origin_datatype->name, target_rank,
                         (unsigned long) target_disp, target_count, target_datatype->name, op->o_name, win->w_name));

    return ompi_osc_rdma_rget_accumulate_internal (sync, origin_addr, origin_count, origin_datatype, NULL, 0,
                                                   NULL, peer, target_rank, target_disp, target_count, target_datatype,
                                                   op, NULL);
}
Esempio n. 21
0
int mca_rcache_base_vma_find_all (mca_rcache_base_vma_module_t *vma_module, void *addr,
                                  size_t size, mca_rcache_base_registration_t **regs,
                                  int reg_cnt)
{
    int rc;
    unsigned char *bound_addr;

    if(size == 0) {
        return OPAL_ERROR;
    }

    bound_addr = (unsigned char *) ((intptr_t) addr + size - 1);

    /* Check to ensure that the cache is valid */
    if (OPAL_UNLIKELY(opal_memory_changed() &&
                      NULL != opal_memory->memoryc_process &&
                      OPAL_SUCCESS != (rc = opal_memory->memoryc_process()))) {
        return rc;
    }

    return mca_rcache_base_vma_tree_find_all (vma_module, (unsigned char *) addr,
                                              bound_addr, regs, reg_cnt);
}
Esempio n. 22
0
/*
 * These functions can be used in order to create an IDENTICAL copy of one convertor. In this
 * context IDENTICAL means that the datatype and count and all other properties of the basic
 * convertor get replicated on this new convertor. However, the references to the datatype
 * are not increased. This function take special care about the stack. If all the cases the
 * stack is created with the correct number of entries but if the copy_stack is true (!= 0)
 * then the content of the old stack is copied on the new one. The result will be a convertor
 * ready to use starting from the old position. If copy_stack is false then the convertor
 * is created with a empty stack (you have to use opal_convertor_set_position before using it).
 */
int opal_convertor_clone( const opal_convertor_t* source,
                          opal_convertor_t* destination,
                          int32_t copy_stack )
{
    destination->remoteArch        = source->remoteArch;
    destination->flags             = source->flags;
    destination->pDesc             = source->pDesc;
    destination->use_desc          = source->use_desc;
    destination->count             = source->count;
    destination->pBaseBuf          = source->pBaseBuf;
    destination->fAdvance          = source->fAdvance;
    destination->master            = source->master;
    destination->local_size        = source->local_size;
    destination->remote_size       = source->remote_size;
    /* create the stack */
    if( OPAL_UNLIKELY(source->stack_size > DT_STATIC_STACK_SIZE) ) {
        destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size );
    } else {
        destination->pStack = destination->static_stack;
    }
    destination->stack_size = source->stack_size;

    /* initialize the stack */
    if( OPAL_LIKELY(0 == copy_stack) ) {
        destination->bConverted = -1;
        destination->stack_pos  = -1;
    } else {
        memcpy( destination->pStack, source->pStack, sizeof(dt_stack_t) * (source->stack_pos+1) );
        destination->bConverted = source->bConverted;
        destination->stack_pos  = source->stack_pos;
    }
#if OPAL_CUDA_SUPPORT
    destination->cbmemcpy   = source->cbmemcpy;
#endif
    return OPAL_SUCCESS;
}
Esempio n. 23
0
int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype,
                      int dst, int tag, mca_pml_base_send_mode_t mode,
                      struct ompi_communicator_t* comm,
                      struct ompi_request_t **request)
{
    ompi_request_t *req;
    ucp_ep_h ep;

    PML_UCX_TRACE_SEND("isend request *%p", buf, count, datatype, dst, tag, mode,
                       comm, (void*)request)

    /* TODO special care to sync/buffered send */

    ep = mca_pml_ucx_get_ep(comm, dst);
    if (OPAL_UNLIKELY(NULL == ep)) {
        PML_UCX_ERROR("Failed to get ep for rank %d", dst);
        return OMPI_ERROR;
    }

    req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count,
                                           mca_pml_ucx_get_datatype(datatype),
                                           PML_UCX_MAKE_SEND_TAG(tag, comm),
                                           mca_pml_ucx_send_completion);
    if (req == NULL) {
        PML_UCX_VERBOSE(8, "returning completed request");
        *request = &ompi_pml_ucx.completed_send_req;
        return OMPI_SUCCESS;
    } else if (!UCS_PTR_IS_ERR(req)) {
        PML_UCX_VERBOSE(8, "got request %p", (void*)req);
        *request = req;
        return OMPI_SUCCESS;
    } else {
        PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req)));
        return OMPI_ERROR;
    }
}
Esempio n. 24
0
/**
 * Allocate a segment.
 *
 * @param btl (IN)      BTL module
 * @param size (IN)     Request segment size.
 */
static mca_btl_base_descriptor_t *mca_btl_self_alloc (struct mca_btl_base_module_t *btl,
                                                      struct mca_btl_base_endpoint_t *endpoint,
                                                      uint8_t order, size_t size, uint32_t flags)
{
    mca_btl_self_frag_t *frag = NULL;

    if (size <= MCA_BTL_SELF_MAX_INLINE_SIZE) {
        MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
    } else if (size <= mca_btl_self.btl_eager_limit) {
        MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag);
    } else if (size <= btl->btl_max_send_size) {
        MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
    }

    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL;
    }

    frag->segments[0].seg_len = size;
    frag->base.des_segment_count = 1;
    frag->base.des_flags       = flags;

    return &frag->base;
}
Esempio n. 25
0
void mca_spml_yoda_put_completion(mca_btl_base_module_t* btl,
                                  struct mca_btl_base_endpoint_t* ep,
                                  struct mca_btl_base_descriptor_t* des,
                                  int status)
{
    mca_spml_yoda_rdma_frag_t* frag =
            (mca_spml_yoda_rdma_frag_t*) des->des_cbdata;
    mca_spml_yoda_put_request_t* putreq =
            (mca_spml_yoda_put_request_t*) frag->rdma_req;
    mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;

    OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, -1);
    /* check completion status */
    if (OPAL_UNLIKELY(OSHMEM_SUCCESS != status)) {
        /* no way to propagete errors. die */
        SPML_ERROR("FATAL put completion error");
        oshmem_shmem_abort(-1);
    }

    putreq->req_put.req_base.req_spml_complete = true;
    oshmem_request_complete(&putreq->req_put.req_base.req_oshmem, 1);
    oshmem_request_free((oshmem_request_t**) &putreq);
    mca_bml_base_free(bml_btl, des);
}
Esempio n. 26
0
/**
 * Allocate a segment.
 *
 * @param btl (IN)      BTL module
 * @param size (IN)     Request segment size.
 */
mca_btl_base_descriptor_t* mca_btl_self_alloc(
        struct mca_btl_base_module_t* btl,
        struct mca_btl_base_endpoint_t* endpoint,
        uint8_t order,
        size_t size,
        uint32_t flags)
{
    mca_btl_self_frag_t* frag = NULL;

    if(size <= mca_btl_self.btl_eager_limit) {
        MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag);
    } else if (size <= btl->btl_max_send_size) {
        MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
    }
    if( OPAL_UNLIKELY(NULL == frag) ) {
        return NULL; 
    }
    
    frag->segment.seg_len = size;
    frag->base.des_flags   = flags;
    frag->base.des_src     = &(frag->segment);
    frag->base.des_src_cnt = 1;
    return (mca_btl_base_descriptor_t*)frag;
}
Esempio n. 27
0
int mca_rcache_vma_find(struct mca_rcache_base_module_t* rcache,
        void* addr, size_t size, mca_mpool_base_registration_t **reg)
{
    int rc;
    unsigned char* bound_addr; 

    if(size == 0) { 
        return OMPI_ERROR; 
    }

    bound_addr = addr + size - 1;
        
    /* Check to ensure that the cache is valid */
    if (OPAL_UNLIKELY(opal_memory_changed() && 
                      NULL != opal_memory->memoryc_process &&
                      OPAL_SUCCESS != (rc = opal_memory->memoryc_process()))) {
        return rc;
    }

    *reg = mca_rcache_vma_tree_find((mca_rcache_vma_module_t*)rcache, (unsigned char*)addr,
            bound_addr); 

    return OMPI_SUCCESS;
}
Esempio n. 28
0
/**
 * Initiate an synchronous get.
 *
 * @param btl (IN)         BTL module
 * @param endpoint (IN)    BTL addressing information
 * @param descriptor (IN)  Description of the data to be transferred
 */
int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
                       struct mca_btl_base_endpoint_t *endpoint,
                       struct mca_btl_base_descriptor_t *des)
{
    mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
    mca_btl_base_segment_t *src = des->des_src;
    mca_btl_base_segment_t *dst = des->des_dst;
    const size_t size = min(dst->seg_len, src->seg_len);
    mca_mpool_base_registration_t *reg;
    void *rem_ptr;

    reg = vader_get_registation (endpoint, src->seg_addr.pval, src->seg_len, 0, &rem_ptr);
    if (OPAL_UNLIKELY(NULL == rem_ptr)) {
        return OMPI_ERROR;
    }

    vader_memmove (dst->seg_addr.pval, rem_ptr, size);

    vader_return_registration (reg, endpoint);

    mca_btl_vader_frag_complete (frag);

    return OMPI_SUCCESS;
}
Esempio n. 29
0
static inline int mca_spml_yoda_put_internal(void *dst_addr,
                                             size_t size,
                                             void *src_addr,
                                             int dst,
                                             int is_nb)
{
    int rc = OSHMEM_SUCCESS;
    mca_spml_yoda_put_request_t *putreq = NULL;
    mca_bml_base_btl_t* bml_btl;
    mca_btl_base_descriptor_t* des = NULL;
    mca_btl_base_segment_t* segment;
    mca_spml_yoda_rdma_frag_t* frag;
    int nfrags;
    int i;
    unsigned ncopied = 0;
    unsigned int frag_size = 0;
    char *p_src, *p_dst;
    void* rva;
    sshmem_mkey_t *r_mkey;
    int btl_id = 0;
    struct yoda_btl *ybtl;
    int put_via_send;
    mca_btl_base_registration_handle_t *local_handle = NULL, *remote_handle = NULL;

    /* If nothing to put its OK.*/
    if (0 >= size) {
        return OSHMEM_SUCCESS;
    }

    /* Find bml_btl and its global btl_id */
    bml_btl = get_next_btl(dst, &btl_id);
    if (!bml_btl) {
        SPML_ERROR("cannot reach %d pe: no appropriate btl found", oshmem_my_proc_id());
        rc = OSHMEM_ERR_FATAL;
        goto exit_fatal;
    }
    /* Check if btl has PUT method. If it doesn't - use SEND*/
    put_via_send = !(bml_btl->btl->btl_flags & MCA_BTL_FLAGS_PUT);

    /* Get rkey of remote PE (dst proc) which must be on memheap*/
    r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, btl_id, &rva);
    if (!r_mkey) {
        SPML_ERROR("pe=%d: %p is not address of shared variable",
                   dst, dst_addr);
        rc = OSHMEM_ERR_FATAL;
        goto exit_fatal;
    }

#if SPML_YODA_DEBUG == 1
    SPML_VERBOSE(100, "put: pe:%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
                 dst, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif

    ybtl = &mca_spml_yoda.btl_type_map[btl_id];

    if (ybtl->btl->btl_register_mem) {
        assert (r_mkey->len == ybtl->btl->btl_registration_handle_size);
        remote_handle = (mca_btl_base_registration_handle_t *) r_mkey->u.data;
    }

    /* check if we doing put into shm attached segment and if so
     * just do memcpy
     */
    if ((YODA_BTL_SM == ybtl->btl_type || YODA_BTL_VADER == ybtl->btl_type)
            && mca_memheap_base_can_local_copy(r_mkey, dst_addr)) {
        memcpy((void *) (unsigned long) rva, src_addr, size);
        return OSHMEM_SUCCESS;
    }

    /* We support only blocking PUT now => we always need copy for src buffer*/
    calc_nfrags_put (bml_btl, size, &frag_size, &nfrags, put_via_send);

    p_src = (char*) src_addr;
    p_dst = (char*) (unsigned long) rva;
    for (i = 0; i < nfrags; i++) {
        /* Allocating send request from free list */
        putreq = mca_spml_yoda_putreq_alloc(dst);
        frag = &putreq->put_frag;
        ncopied = i < nfrags - 1 ? frag_size :(unsigned) ((char *) src_addr + size - p_src);

        /* Preparing source buffer */

        /* allocate buffer */
        mca_spml_yoda_bml_alloc(bml_btl,
                                &des,
                                MCA_BTL_NO_ORDER,
                                ncopied,
                                MCA_BTL_DES_SEND_ALWAYS_CALLBACK,
                                put_via_send);

        if (OPAL_UNLIKELY(!des || !des->des_segments )) {
            SPML_ERROR("src=%p nfrags = %d frag_size=%d",
                       src_addr, nfrags, frag_size);
            SPML_ERROR("shmem OOM error need %d bytes", ncopied);
            opal_show_help("help-oshmem-spml-yoda.txt",
                           "internal_oom_error",
                           true,
                           "Put", ncopied, mca_spml_yoda.bml_alloc_threshold);
            rc = OSHMEM_ERR_FATAL;
            goto exit_fatal;
        }

        /* copy data to allocated buffer*/
        segment = des->des_segments;
        spml_yoda_prepare_for_put((void*)segment->seg_addr.pval, ncopied,
                                  (void*)p_src, (void*)p_dst, put_via_send);

        if (!put_via_send && ybtl->btl->btl_register_mem) {
            local_handle = ybtl->btl->btl_register_mem (ybtl->btl, bml_btl->btl_endpoint,
                                                        segment->seg_addr.pval, ncopied, 0);
            if (NULL == local_handle) {
                /* No free resources, Block on completion here */
                SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE");
                oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem);
            }
        }

        frag->rdma_segs[0].base_seg.seg_addr.lval = (uintptr_t) p_dst;
        frag->rdma_segs[0].base_seg.seg_len = (put_via_send ?
                                                   ncopied + SPML_YODA_SEND_CONTEXT_SIZE :
                                                   ncopied);
        frag->rdma_req = putreq;

        /* initialize callback data for put*/
        des->des_cbdata = frag;
        des->des_cbfunc = mca_spml_yoda_put_completion;

        OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, 1);
        /* put the data to remote side */
        if (!put_via_send) {
            rc = mca_bml_base_put (bml_btl, segment->seg_addr.pval, (uint64_t) (intptr_t) p_dst,
                                   local_handle, remote_handle, ncopied, 0, 0, mca_spml_yoda_put_completion_rdma,
                                   des);
        } else {
            rc = mca_bml_base_send(bml_btl, des, MCA_SPML_YODA_PUT);
            if (1 == rc)
                rc = OSHMEM_SUCCESS;
        }

        if (OPAL_UNLIKELY(OSHMEM_SUCCESS != rc)) {
            if (OSHMEM_ERR_OUT_OF_RESOURCE == rc) {
                /* No free resources, Block on completion here */
                SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE");
                oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem);
            } else {
                SPML_ERROR("shmem error");
            }
            /* exit with errro */
            SPML_ERROR("shmem error: ret = %i, send_pe = %i, dest_pe = %i",
                       rc, oshmem_my_proc_id(), dst);
            rc = OSHMEM_ERR_FATAL;
            goto exit_fatal;
        }
        p_src += ncopied;
        p_dst += ncopied;
    }

    return rc;

exit_fatal:
    if (OSHMEM_SUCCESS != rc) {
        oshmem_shmem_abort(rc);
    }
    return rc;
}
Esempio n. 30
0
static void mca_yoda_get_callback(mca_btl_base_module_t* btl,
                                  mca_btl_base_tag_t tag,
                                  mca_btl_base_descriptor_t* des,
                                  void* cbdata )
{
    void** p, ** p_src, **p_dst;
    size_t* size;
    int* dst;
    void** p_getreq;
    mca_btl_base_descriptor_t* des_loc;
    int rc;
    mca_bml_base_btl_t* bml_btl;
    mca_spml_yoda_rdma_frag_t* frag;
    int btl_id;
    mca_spml_yoda_put_request_t *putreq;

    rc = OSHMEM_SUCCESS;
    btl_id = 0;
    putreq = NULL;

    /* Unpack data */
    p = (void **)des->des_segments->seg_addr.pval;
    p_src = (void*) p;

    size = (size_t*)((char*)p_src + sizeof(*p_src) );
    dst = (int*)( (char*)size + sizeof(*size));
    p_dst = (void*) ((char*)dst + sizeof(*dst));
    p_getreq =(void**) ( (char*)p_dst + sizeof(*p_dst));

    /* Prepare put via send*/
    bml_btl = get_next_btl(*dst, &btl_id);

    putreq = mca_spml_yoda_putreq_alloc(*dst);
    frag = &putreq->put_frag;

    mca_spml_yoda_bml_alloc(bml_btl,
                            &des_loc,
                            MCA_BTL_NO_ORDER,
                            *size,
                            MCA_BTL_DES_SEND_ALWAYS_CALLBACK,
                            1);

    if (OPAL_UNLIKELY(!des_loc || !des_loc->des_segments)) {
        SPML_ERROR("shmem OOM error need %d bytes", (int)*size);
        oshmem_shmem_abort(-1);
    }
    spml_yoda_prepare_for_get_response((void*)des_loc->des_segments->seg_addr.pval, *size, (void*)*p_src, (void*) *p_dst,(void*)*p_getreq,1);

    frag->rdma_req = putreq;

    /* Initialize callback data for put*/
    des_loc->des_cbdata = frag;
    des_loc->des_cbfunc = mca_spml_yoda_put_completion;
    des_loc->des_segment_count = 1;

    OPAL_THREAD_ADD32(&mca_spml_yoda.n_active_puts, 1);

    /* Put via send*/
    rc = mca_bml_base_send(bml_btl, des_loc, MCA_SPML_YODA_GET_RESPONSE);
    if (1 == rc) {
        rc = OSHMEM_SUCCESS;
    }

    if (OPAL_UNLIKELY(OSHMEM_SUCCESS != rc)) {
        if (OSHMEM_ERR_OUT_OF_RESOURCE == rc) {
            /* No free resources, Block on completion here */
            SPML_ERROR("shmem error: OSHMEM_ERR_OUT_OF_RESOURCE");
            oshmem_request_wait_completion(&putreq->req_put.req_base.req_oshmem);
        } else {
            SPML_ERROR("shmem error");
        }
        /* exit with errro */
        SPML_ERROR("shmem error: ret = %i, send_pe = %i, dest_pe = %i",
                   rc, oshmem_my_proc_id(), *dst);
        oshmem_shmem_abort(-1);
        rc = OSHMEM_ERROR;
    }
}