Example #1
0
File: ucp_ep.c Project: alex--m/ucx
static void ucp_ep_flush_progress(ucp_request_t *req)
{
    ucp_ep_h ep = req->send.ep;
    ucp_lane_index_t lane;
    ucs_status_t status;
    uct_ep_h uct_ep;

    ucs_trace("ep %p: progress flush req %p, lanes 0x%x count %d", ep, req,
              req->send.flush.lanes, req->send.uct_comp.count);

    while (req->send.flush.lanes) {

        /* Search for next lane to start flush */
        lane   = ucs_ffs64(req->send.flush.lanes);
        uct_ep = ep->uct_eps[lane];
        if (uct_ep == NULL) {
            req->send.flush.lanes &= ~UCS_BIT(lane);
            --req->send.uct_comp.count;
            continue;
        }

        /* Start flush operation on UCT endpoint */
        status = uct_ep_flush(uct_ep, 0, &req->send.uct_comp);
        ucs_trace("flushing ep %p lane[%d]: %s", ep, lane,
                  ucs_status_string(status));
        if (status == UCS_OK) {
            req->send.flush.lanes &= ~UCS_BIT(lane);
            --req->send.uct_comp.count;
        } else if (status == UCS_INPROGRESS) {
            req->send.flush.lanes &= ~UCS_BIT(lane);
        } else if (status == UCS_ERR_NO_RESOURCE) {
            if (req->send.lane != UCP_NULL_LANE) {
                ucs_trace("ep %p: not adding pending flush %p on lane %d, "
                          "because it's already pending on lane %d",
                          ep, req, lane, req->send.lane);
                break;
            }

            req->send.lane = lane;
            status = uct_ep_pending_add(uct_ep, &req->send.uct);
            ucs_trace("adding pending flush on ep %p lane[%d]: %s", ep, lane,
                      ucs_status_string(status));
            if (status == UCS_OK) {
                req->send.flush.lanes &= ~UCS_BIT(lane);
            } else if (status != UCS_ERR_BUSY) {
                ucp_ep_flush_error(req, status);
            }
        } else {
            ucp_ep_flush_error(req, status);
        }
    }
}
Example #2
0
void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
{
    spml_ucx_mkey_t   *ucx_mkey;
    mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
    ucs_status_t err;
    
    ucx_mkey = &ucx_ctx->ucp_peers[pe].mkeys[segno].key;

    err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn,
            mkey->u.data,
            &ucx_mkey->rkey); 
    if (UCS_OK != err) {
        SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err));
        goto error_fatal;
    }

    if (ucx_ctx == &mca_spml_ucx_ctx_default) {
        mkey->spml_context = ucx_mkey;
    }
    mca_spml_ucx_cache_mkey(ucx_ctx, mkey, segno, pe);
    return;

error_fatal:
    oshmem_shmem_abort(-1);
    return;
}
Example #3
0
void mca_spml_ucx_memuse_hook(void *addr, size_t length)
{
    int my_pe;
    spml_ucx_mkey_t *ucx_mkey;
    ucp_mem_advise_params_t params;
    ucs_status_t status;

    if (!(mca_spml_ucx.heap_reg_nb && memheap_is_va_in_segment(addr, HEAP_SEG_INDEX))) {
        return;
    }

    my_pe    = oshmem_my_proc_id();
    ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[my_pe].mkeys[HEAP_SEG_INDEX].key;

    params.field_mask = UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS |
                        UCP_MEM_ADVISE_PARAM_FIELD_LENGTH |
                        UCP_MEM_ADVISE_PARAM_FIELD_ADVICE;

    params.address = addr;
    params.length  = length;
    params.advice  = UCP_MADV_WILLNEED;

    status = ucp_mem_advise(mca_spml_ucx.ucp_context, ucx_mkey->mem_h, &params);
    if (UCS_OK != status) {
        SPML_UCX_ERROR("ucp_mem_advise failed addr %p len %llu : %s",
                       addr, (unsigned long long)length, ucs_status_string(status));
    }
}
Example #4
0
size_t ucp_tag_rndv_rts_pack(void *dest, void *arg)
{
    ucp_request_t *sreq              = arg;   /* send request */
    ucp_rndv_rts_hdr_t *rndv_rts_hdr = dest;
    ucp_worker_h worker              = sreq->send.ep->worker;
    ssize_t packed_rkey_size;

    rndv_rts_hdr->super.tag        = sreq->send.tag.tag;
    rndv_rts_hdr->sreq.reqptr      = (uintptr_t)sreq;
    rndv_rts_hdr->sreq.sender_uuid = worker->uuid;
    rndv_rts_hdr->size             = sreq->send.length;

    /* Pack remote keys (which can be empty list) */
    if (UCP_DT_IS_CONTIG(sreq->send.datatype) &&
        ucp_rndv_is_get_zcopy(sreq, worker->context->config.ext.rndv_mode)) {
        /* pack rkey, ask target to do get_zcopy */
        rndv_rts_hdr->address = (uintptr_t)sreq->send.buffer;
        packed_rkey_size = ucp_rkey_pack_uct(worker->context,
                                             sreq->send.state.dt.dt.contig.md_map,
                                             sreq->send.state.dt.dt.contig.memh,
                                             rndv_rts_hdr + 1);
        if (packed_rkey_size < 0) {
            ucs_fatal("failed to pack rendezvous remote key: %s",
                      ucs_status_string(packed_rkey_size));
        }
    } else {
        rndv_rts_hdr->address = 0;
        packed_rkey_size      = 0;
    }

    return sizeof(*rndv_rts_hdr) + packed_rkey_size;
}
Example #5
0
static UCS_CLASS_CLEANUP_FUNC(uct_mm_ep_t)
{
    uct_mm_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_mm_iface_t);
    ucs_status_t status;
    uct_mm_remote_seg_t *remote_seg;
    struct sglib_hashed_uct_mm_remote_seg_t_iterator iter;

    uct_mm_ep_signal_remote(self, UCT_MM_IFACE_SIGNAL_DISCONNECT);

    uct_worker_progress_unregister(iface->super.worker, uct_mm_iface_progress,
                                   iface);

    for (remote_seg = sglib_hashed_uct_mm_remote_seg_t_it_init(&iter, self->remote_segments_hash);
         remote_seg != NULL; remote_seg = sglib_hashed_uct_mm_remote_seg_t_it_next(&iter)) {
            sglib_hashed_uct_mm_remote_seg_t_delete(self->remote_segments_hash, remote_seg);
            /* detach the remote proceess's descriptors segment */
            status = uct_mm_md_mapper_ops(iface->super.md)->detach(remote_seg);
            if (status != UCS_OK) {
                ucs_warn("Unable to detach shared memory segment of descriptors: %s",
                         ucs_status_string(status));
            }
            ucs_free(remote_seg);
    }

    /* detach the remote proceess's shared memory segment (remote recv FIFO) */
    status = uct_mm_md_mapper_ops(iface->super.md)->detach(&self->mapped_desc);
    if (status != UCS_OK) {
        ucs_error("error detaching from remote FIFO");
    }

    uct_mm_ep_pending_purge(&self->super.super, NULL, NULL);
}
Example #6
0
File: mm_ep.c Project: brminich/ucx
ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter,
                                                  ucs_arbiter_elem_t *elem,
                                                  void *arg)
{
    uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv);
    ucs_status_t status;
    uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_mm_ep_t, arb_group);

    /* update the local tail with its actual value from the remote peer
     * making sure that the pending sends would use the real tail value */
    ucs_memory_cpu_load_fence();
    ep->cached_tail = ep->fifo_ctl->tail;

    if (!uct_mm_ep_has_tx_resources(ep)) {
        return UCS_ARBITER_CB_RESULT_RESCHED_GROUP;
    }

    status = req->func(req);
    ucs_trace_data("progress pending request %p returned %s", req,
                   ucs_status_string(status));

    if (status == UCS_OK) {
        /* sent successfully. remove from the arbiter */
        return UCS_ARBITER_CB_RESULT_REMOVE_ELEM;
    } else if (status == UCS_INPROGRESS) {
        /* sent but not completed, keep in the arbiter */
        return UCS_ARBITER_CB_RESULT_NEXT_GROUP;
    } else {
        /* couldn't send. keep this request in the arbiter until the next time
         * this function is called */
        return UCS_ARBITER_CB_RESULT_RESCHED_GROUP;
    }
}
Example #7
0
int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src,
                     int tag, struct ompi_communicator_t* comm,
                     ompi_status_public_t* mpi_status)
{
    ucp_tag_t ucp_tag, ucp_tag_mask;
    ompi_request_t *req;

    PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv");

    PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm);
    req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, buf, count,
                                           mca_pml_ucx_get_datatype(datatype),
                                           ucp_tag, ucp_tag_mask,
                                           mca_pml_ucx_blocking_recv_completion);
    if (UCS_PTR_IS_ERR(req)) {
        PML_UCX_ERROR("ucx recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req)));
        return OMPI_ERROR;
    }

    ucp_worker_progress(ompi_pml_ucx.ucp_worker);
    while ( !REQUEST_COMPLETE(req) ) {
        opal_progress();
    }

    if (mpi_status != MPI_STATUS_IGNORE) {
        *mpi_status = req->req_status;
    }

    req->req_complete = REQUEST_PENDING;
    ucp_request_release(req);
    return OMPI_SUCCESS;
}
Example #8
0
ucs_status_t ucp_wireup_msg_progress(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_ep_h ep = req->send.ep;
    ssize_t packed_len;

    if (req->send.wireup.type == UCP_WIREUP_MSG_REQUEST) {
        if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) {
            ucs_trace("ep %p: not sending wireup message - remote already connected",
                      ep);
            goto out;
        }
    }

    /* send the active message */
    if (req->send.wireup.type == UCP_WIREUP_MSG_ACK) {
        req->send.lane = ucp_ep_get_am_lane(ep);
    } else {
        req->send.lane = ucp_ep_get_wireup_msg_lane(ep);
    }
    packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_WIREUP,
                                 ucp_wireup_msg_pack, req);
    if (packed_len < 0) {
        if (packed_len != UCS_ERR_NO_RESOURCE) {
            ucs_error("failed to send wireup: %s", ucs_status_string(packed_len));
        }
        return (ucs_status_t)packed_len;
    }

out:
    ucp_request_complete_send(req, UCS_OK);
    return UCS_OK;
}
Example #9
0
int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, int dst,
                     int tag, mca_pml_base_send_mode_t mode,
                     struct ompi_communicator_t* comm)
{
    ompi_request_t *req;
    ucp_ep_h ep;

    PML_UCX_TRACE_SEND("%s", buf, count, datatype, dst, tag, mode, comm, "send");

    /* TODO special care to sync/buffered send */

    ep = mca_pml_ucx_get_ep(comm, dst);
    if (OPAL_UNLIKELY(NULL == ep)) {
        PML_UCX_ERROR("Failed to get ep for rank %d", dst);
        return OMPI_ERROR;
    }

    req = (ompi_request_t*)ucp_tag_send_nb(ep, buf, count,
                                           mca_pml_ucx_get_datatype(datatype),
                                           PML_UCX_MAKE_SEND_TAG(tag, comm),
                                           mca_pml_ucx_send_completion);
    if (OPAL_LIKELY(req == NULL)) {
        return OMPI_SUCCESS;
    } else if (!UCS_PTR_IS_ERR(req)) {
        PML_UCX_VERBOSE(8, "got request %p", (void*)req);
        ucp_worker_progress(ompi_pml_ucx.ucp_worker);
        ompi_request_wait(&req, MPI_STATUS_IGNORE);
        return OMPI_SUCCESS;
    } else {
        PML_UCX_ERROR("ucx send failed: %s", ucs_status_string(UCS_PTR_STATUS(req)));
        return OMPI_ERROR;
    }
}
Example #10
0
ucs_arbiter_cb_result_t uct_ugni_ep_process_pending(ucs_arbiter_t *arbiter,
                                                    ucs_arbiter_elem_t *elem,
                                                    void *arg){
    uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group);
    uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv);
    ucs_status_t rc;

    ep->arb_sched = 1;
    ucs_trace_data("progressing pending request %p", req);
    rc = req->func(req);
    ep->arb_sched = 0;
    ucs_trace_data("status returned from progress pending: %s",
                   ucs_status_string(rc));

    if (UCS_OK == rc) {
        /* sent successfully. remove from the arbiter */
        return UCS_ARBITER_CB_RESULT_REMOVE_ELEM;
    } else if (UCS_INPROGRESS == rc) {
        return UCS_ARBITER_CB_RESULT_NEXT_GROUP;
    } else {
        /* couldn't send. keep this request in the arbiter until the next time
         * this function is called */
        return UCS_ARBITER_CB_RESULT_RESCHED_GROUP;
    }
}
Example #11
0
static ucs_status_t ucs_async_handler_dispatch(ucs_async_handler_t *handler)
{
    ucs_async_context_t *async;
    ucs_async_mode_t mode;
    ucs_status_t status;

    mode  = handler->mode;
    async = handler->async;
    if (async != NULL) {
        async->last_wakeup = ucs_get_time();
    }
    if (async == NULL) {
        ucs_trace_async("calling async handler " UCS_ASYNC_HANDLER_FMT,
                        UCS_ASYNC_HANDLER_ARG(handler));
        handler->cb(handler->id, handler->arg);
    } else if (ucs_async_method_call(mode, context_try_block, async)) {
        ucs_trace_async("calling async handler " UCS_ASYNC_HANDLER_FMT,
                        UCS_ASYNC_HANDLER_ARG(handler));
        handler->cb(handler->id, handler->arg);
        ucs_async_method_call(mode, context_unblock, async);
    } else /* async != NULL */ {
        ucs_trace_async("missed " UCS_ASYNC_HANDLER_FMT ", last_wakeup %lu",
                        UCS_ASYNC_HANDLER_ARG(handler), async->last_wakeup);
        if (ucs_atomic_cswap32(&handler->missed, 0, 1) == 0) {
            status = ucs_mpmc_queue_push(&async->missed, handler->id);
            if (status != UCS_OK) {
                ucs_fatal("Failed to push event %d to miss queue: %s",
                          handler->id, ucs_status_string(status));
            }
        }
        return UCS_ERR_NO_PROGRESS;
    }
    return UCS_OK;
}
Example #12
0
static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf, ucx_perf_params_t *params)
{
    ucp_params_t ucp_params;
    ucp_config_t *config;
    ucs_status_t status;
    uint64_t features;

    status = ucp_perf_test_check_params(params, &features);
    if (status != UCS_OK) {
        goto err;
    }

    status = ucp_config_read(NULL, NULL, &config);
    if (status != UCS_OK) {
        goto err;
    }

    ucp_params.features        = features;
    ucp_params.request_size    = 0;
    ucp_params.request_init    = NULL;
    ucp_params.request_cleanup = NULL;

    status = ucp_init(&ucp_params, config, &perf->ucp.context);
    ucp_config_release(config);
    if (status != UCS_OK) {
        goto err;
    }

    status = ucp_worker_create(perf->ucp.context, params->thread_mode,
                               &perf->ucp.worker);
    if (status != UCS_OK) {
        goto err_cleanup;
    }

    status = ucp_perf_test_alloc_mem(perf, params);
    if (status != UCS_OK) {
        ucs_warn("ucp test failed to alocate memory");
        goto err_destroy_worker;
    }

    status = ucp_perf_test_setup_endpoints(perf, features);
    if (status != UCS_OK) {
        if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("Failed to setup endpoints: %s", ucs_status_string(status));
        }
        goto err_free_mem;
    }

    return UCS_OK;

err_free_mem:
    ucp_perf_test_free_mem(perf);
err_destroy_worker:
    ucp_worker_destroy(perf->ucp.worker);
err_cleanup:
    ucp_cleanup(perf->ucp.context);
err:
    return status;
}
Example #13
0
File: ucp_ep.c Project: alex--m/ucx
ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_address_t *address,
                           ucp_ep_h *ep_p)
{
    char peer_name[UCP_WORKER_NAME_MAX];
    uint8_t addr_indices[UCP_MAX_LANES];
    ucp_address_entry_t *address_list;
    unsigned address_count;
    ucs_status_t status;
    uint64_t dest_uuid;
    ucp_ep_h ep;

    UCS_ASYNC_BLOCK(&worker->async);

    status = ucp_address_unpack(address, &dest_uuid, peer_name, sizeof(peer_name),
                                &address_count, &address_list);
    if (status != UCS_OK) {
        ucs_error("failed to unpack remote address: %s", ucs_status_string(status));
        goto out;
    }

    ep = ucp_worker_ep_find(worker, dest_uuid);
    if (ep != NULL) {
        /* TODO handle a case where the existing endpoint is incomplete */
        *ep_p = ep;
        status = UCS_OK;
        goto out_free_address;
    }

    /* allocate endpoint */
    status = ucp_ep_new(worker, dest_uuid, peer_name, "from api call", &ep);
    if (status != UCS_OK) {
        goto out_free_address;
    }

    /* initialize transport endpoints */
    status = ucp_wireup_init_lanes(ep, address_count, address_list, addr_indices);
    if (status != UCS_OK) {
        goto err_destroy_ep;
    }

    /* send initial wireup message */
    if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) {
        status = ucp_wireup_send_request(ep);
        if (status != UCS_OK) {
            goto err_destroy_ep;
        }
    }

    *ep_p = ep;
    goto out_free_address;

err_destroy_ep:
    ucp_ep_destroy(ep);
out_free_address:
    ucs_free(address_list);
out:
    UCS_ASYNC_UNBLOCK(&worker->async);
    return status;
}
Example #14
0
int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
{
    int my_rank = oshmem_my_proc_id();
    size_t num_reqs, max_reqs;
    void *dreq, **dreqs;
    ucp_ep_h ep;
    size_t i, n;

    oshmem_shmem_barrier();

    if (!mca_spml_ucx.ucp_peers) {
        return OSHMEM_SUCCESS;
    }

    max_reqs = mca_spml_ucx.num_disconnect;
    if (max_reqs > nprocs) {
        max_reqs = nprocs;
    }

    dreqs = malloc(sizeof(*dreqs) * max_reqs);
    if (dreqs == NULL) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    num_reqs = 0;

    for (i = 0; i < nprocs; ++i) {
        n  = (i + my_rank) % nprocs;
        ep = mca_spml_ucx.ucp_peers[n].ucp_conn;
        if (ep == NULL) {
            continue;
        }

        SPML_VERBOSE(10, "disconnecting from peer %d", n);
        dreq = ucp_disconnect_nb(ep);
        if (dreq != NULL) {
            if (UCS_PTR_IS_ERR(dreq)) {
                SPML_ERROR("ucp_disconnect_nb(%d) failed: %s", n,
                           ucs_status_string(UCS_PTR_STATUS(dreq)));
            } else {
                dreqs[num_reqs++] = dreq;
            }
        }

        mca_spml_ucx.ucp_peers[n].ucp_conn = NULL;

        if ((int)num_reqs >= mca_spml_ucx.num_disconnect) {
            mca_spml_ucx_waitall(dreqs, &num_reqs);
        }
    }

    mca_spml_ucx_waitall(dreqs, &num_reqs);
    free(dreqs);

    opal_pmix.fence(NULL, 0);
    free(mca_spml_ucx.ucp_peers);
    return OSHMEM_SUCCESS;
}
Example #15
0
static ucs_status_t ucp_add_tl_resources(ucp_context_h context,
                                         uct_pd_h pd, ucp_rsc_index_t pd_index,
                                         const ucp_config_t *config,
                                         unsigned *num_resources_p,
                                         uint64_t *masks)
{
    uct_tl_resource_desc_t *tl_resources;
    ucp_tl_resource_desc_t *tmp;
    unsigned num_resources;
    ucs_status_t status;
    ucp_rsc_index_t i;

    *num_resources_p = 0;

    /* check what are the available uct resources */
    status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources);
    if (status != UCS_OK) {
        ucs_error("Failed to query resources: %s", ucs_status_string(status));
        goto err;
    }

    if (num_resources == 0) {
        ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name);
        goto out_free_resources;
    }

    tmp = ucs_realloc(context->tl_rscs,
                      sizeof(*context->tl_rscs) * (context->num_tls + num_resources),
                      "ucp resources");
    if (tmp == NULL) {
        ucs_error("Failed to allocate resources");
        status = UCS_ERR_NO_MEMORY;
        goto err_free_resources;
    }

    /* copy only the resources enabled by user configuration */
    context->tl_rscs = tmp;
    for (i = 0; i < num_resources; ++i) {
        if (ucp_is_resource_enabled(&tl_resources[i], config, masks)) {
            context->tl_rscs[context->num_tls].tl_rsc   = tl_resources[i];
            context->tl_rscs[context->num_tls].pd_index = pd_index;
            ++context->num_tls;
            ++(*num_resources_p);
        }
    }

out_free_resources:
    uct_release_tl_resource_list(tl_resources);
    return UCS_OK;

err_free_resources:
    uct_release_tl_resource_list(tl_resources);
err:
    return status;
}
Example #16
0
static UCS_F_ALWAYS_INLINE ucs_status_ptr_t
ucp_tag_send_req(ucp_request_t *req, size_t count,
                 const ucp_ep_msg_config_t* msg_config,
                 size_t rndv_rma_thresh, size_t rndv_am_thresh,
                 ucp_send_callback_t cb, const ucp_proto_t *proto)
{
    size_t seg_size     = (msg_config->max_bcopy - proto->only_hdr_size);
    size_t rndv_thresh  = ucp_tag_get_rndv_threshold(req, count,
                                                     msg_config->max_iov,
                                                     rndv_rma_thresh,
                                                     rndv_am_thresh, seg_size);
    size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, count,
                                                        rndv_thresh);
    ssize_t max_short   = ucp_proto_get_short_max(req, msg_config);
    ucs_status_t status;

    ucs_trace_req("select tag request(%p) progress algorithm datatype=%lx "
                  "buffer=%p length=%zu max_short=%zd rndv_thresh=%zu "
                  "zcopy_thresh=%zu",
                  req, req->send.datatype, req->send.buffer, req->send.length,
                  max_short, rndv_thresh, zcopy_thresh);

    status = ucp_request_send_start(req, max_short, zcopy_thresh, seg_size,
                                    rndv_thresh, proto);
    if (ucs_unlikely(status != UCS_OK)) {
        if (status == UCS_ERR_NO_PROGRESS) {
             ucs_assert(req->send.length >= rndv_thresh);
            /* RMA/AM rendezvous */
            status = ucp_tag_send_start_rndv(req);
        }
        if (status != UCS_OK) {
            return UCS_STATUS_PTR(status);
        }
    }

    ucp_request_send_tag_stat(req);

    /*
     * Start the request.
     * If it is completed immediately, release the request and return the status.
     * Otherwise, return the request.
     */
    status = ucp_request_send(req);
    if (req->flags & UCP_REQUEST_FLAG_COMPLETED) {
        ucs_trace_req("releasing send request %p, returning status %s", req,
                      ucs_status_string(status));
        ucp_request_put(req);
        return UCS_STATUS_PTR(status);
    }

    ucp_request_set_callback(req, send.cb, cb)
    ucs_trace_req("returning send request %p", req);
    return req + 1;
}
Example #17
0
File: ucp_ep.c Project: bbenton/ucx
ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_address_t *address,
                           ucp_ep_h *ep_p)
{
    char peer_name[UCP_WORKER_NAME_MAX];
    ucs_status_t status;
    uint64_t dest_uuid;
    unsigned address_count;
    ucp_address_entry_t *address_list;
    ucp_ep_h ep;

    UCS_ASYNC_BLOCK(&worker->async);

    status = ucp_address_unpack(address, &dest_uuid, peer_name, sizeof(peer_name),
                                &address_count, &address_list);
    if (status != UCS_OK) {
        ucs_error("failed to unpack remote address: %s", ucs_status_string(status));
        goto out;
    }

    ep = ucp_worker_ep_find(worker, dest_uuid);
    if (ep != NULL) {
        /* TODO handle a case where the existing endpoint is incomplete */
        ucs_debug("returning existing ep %p which is already connected to %"PRIx64,
                  ep, ep->dest_uuid);
        *ep_p = ep;
        status = UCS_OK;
        goto out_free_address;
    }

    status = ucp_ep_create_connected(worker, dest_uuid, peer_name, address_count,
                                     address_list, " from api call", &ep);
    if (status != UCS_OK) {
        goto out_free_address;
    }

    /* send initial wireup message */
    if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) {
        status = ucp_wireup_send_request(ep);
        if (status != UCS_OK) {
            goto err_destroy_ep;
        }
    }

    *ep_p = ep;
    goto out_free_address;

err_destroy_ep:
    ucp_ep_destroy(ep);
out_free_address:
    ucs_free(address_list);
out:
    UCS_ASYNC_UNBLOCK(&worker->async);
    return status;
}
Example #18
0
static ucs_status_t uct_perf_test_alloc_mem(ucx_perf_context_t *perf,
                                            ucx_perf_params_t *params)
{
    ucs_status_t status;

    /* TODO use params->alignment  */

    status = uct_iface_mem_alloc(perf->uct.iface, 
                                 params->message_size * params->thread_count, 0,
                                 "perftest", &perf->uct.send_mem);
    if (status != UCS_OK) {
        ucs_error("Failed allocate send buffer: %s", ucs_status_string(status));
        goto err;
    }

    ucs_assert(perf->uct.send_mem.md == perf->uct.md);
    perf->send_buffer = perf->uct.send_mem.address;

    status = uct_iface_mem_alloc(perf->uct.iface, 
                                 params->message_size * params->thread_count, 0,
                                 "perftest", &perf->uct.recv_mem);
    if (status != UCS_OK) {
        ucs_error("Failed allocate receive buffer: %s", ucs_status_string(status));
        goto err_free_send;
    }

    ucs_assert(perf->uct.recv_mem.md == perf->uct.md);
    perf->recv_buffer = perf->uct.recv_mem.address;

    perf->offset = 0;

    ucs_debug("allocated memory. Send buffer %p, Recv buffer %p",
              perf->send_buffer, perf->recv_buffer);
    return UCS_OK;

err_free_send:
    uct_iface_mem_free(&perf->uct.send_mem);
err:
    return status;
}
Example #19
0
int mca_spml_ucx_fence(shmem_ctx_t ctx)
{
    ucs_status_t err;
    mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;

    err = ucp_worker_fence(ucx_ctx->ucp_worker);
    if (UCS_OK != err) {
         SPML_UCX_ERROR("fence failed: %s", ucs_status_string(err));
         oshmem_shmem_abort(-1);
         return OSHMEM_ERROR;
    }
    return OSHMEM_SUCCESS;
}
Example #20
0
static int ucx_perf_thread_spawn(ucx_perf_params_t* params, 
                                 ucx_perf_result_t* result) {
    ucx_perf_context_t perf;
    ucs_status_t status;
    int ti;
    int nti = params->thread_count;

    ucx_perf_thread_context_t* tctx = 
        calloc(nti, sizeof(ucx_perf_thread_context_t));
    ucs_status_t* statuses = 
        calloc(nti, sizeof(ucs_status_t));
    pthread_barrier_t tbarrier;
    pthread_barrier_init(&tbarrier, NULL, nti);

    ucx_perf_test_reset(&perf, params);
    status = ucx_perf_funcs[params->api].setup(&perf, params);
    if (UCS_OK != status) {
        goto out_cleanup;
    }

    for (ti = 0; ti < nti; ti++) {
        tctx[ti].tid = ti;
        tctx[ti].ntid = nti;
        tctx[ti].tbarrier = &tbarrier;
        tctx[ti].statuses = statuses;
        tctx[ti].params = *params;
        tctx[ti].perf = perf;
        /* Doctor the src and dst buffers to make them thread specific */
        tctx[ti].perf.send_buffer += ti * params->message_size;
        tctx[ti].perf.recv_buffer += ti * params->message_size;
        pthread_create(&tctx[ti].pt, NULL, 
                       ucx_perf_thread_run_test, (void*)&tctx[ti]);
    }
    
    for (ti = 0; ti < nti; ti++) {
        pthread_join(tctx[ti].pt, NULL);
        if (UCS_OK != statuses[ti]) {
            ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti]));
            status = statuses[ti];
        }
    }
    
    ucx_perf_funcs[params->api].cleanup(&perf);

out_cleanup:
    pthread_barrier_destroy(&tbarrier);
    free(statuses);
    free(tctx);

    return status;
}
Example #21
0
File: ucp_ep.c Project: alex--m/ucx
static ucs_status_ptr_t ucp_disconnect_nb_internal(ucp_ep_h ep)
{
    ucs_status_t status;
    ucp_request_t *req;

    ucs_debug("disconnect ep %p", ep);

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY);
    }

    /*
     *  Flush operation can be queued on the pending queue of only one of the
     * lanes (indicated by req->send.lane) and scheduled for completion on any
     * number of lanes. req->send.uct_comp.count keeps track of how many lanes
     * are not flushed yet, and when it reaches zero, it means all lanes are
     * flushed. req->send.flush.lanes keeps track of which lanes we still have
     * to start flush on.
     *  If a flush is completed from a pending/completion callback, we need to
     * schedule slow-path callback to release the endpoint later, since a UCT
     * endpoint cannot be released from pending/completion callback context.
     */
    req->flags                  = 0;
    req->status                 = UCS_OK;
    req->send.ep                = ep;
    req->send.flush.flushed_cb  = ucp_ep_disconnected;
    req->send.flush.lanes       = UCS_MASK(ucp_ep_num_lanes(ep));
    req->send.flush.cbq_elem.cb = ucp_ep_flushed_slow_path_callback;
    req->send.flush.cbq_elem_on = 0;
    req->send.lane              = UCP_NULL_LANE;
    req->send.uct.func          = ucp_ep_flush_progress_pending;
    req->send.uct_comp.func     = ucp_ep_flush_completion;
    req->send.uct_comp.count    = ucp_ep_num_lanes(ep);

    ucp_ep_flush_progress(req);

    if (req->send.uct_comp.count == 0) {
        status = req->status;
        ucp_ep_disconnected(req);
        ucs_trace_req("ep %p: releasing flush request %p, returning status %s",
                      ep, req, ucs_status_string(status));
        ucs_mpool_put(req);
        return UCS_STATUS_PTR(status);
    }

    ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req,
                  req + 1);
    return req + 1;
}
Example #22
0
static void
mca_pml_ucx_blocking_recv_completion(void *request, ucs_status_t status,
                                     ucp_tag_recv_info_t *info)
{
    ompi_request_t *req = request;

    PML_UCX_VERBOSE(8, "blocking receive request %p completed with status %s tag %"PRIx64" len %zu",
                    (void*)req, ucs_status_string(status), info->sender_tag,
                    info->length);

    mca_pml_ucx_set_recv_status(&req->req_status, status, info);
    PML_UCX_ASSERT( !(REQUEST_COMPLETE(req)));
    ompi_request_complete(req,true);
}
Example #23
0
static UCS_F_ALWAYS_INLINE void
ucp_tag_recv_request_completed(ucp_request_t *req, ucs_status_t status,
                               ucp_tag_recv_info_t *info, const char *function)
{
    ucs_trace_req("%s returning completed request %p (%p) stag 0x%"PRIx64" len %zu, %s",
                  function, req, req + 1, info->sender_tag, info->length,
                  ucs_status_string(status));

    req->status = status;
    if ((req->flags |= UCP_REQUEST_FLAG_COMPLETED) & UCP_REQUEST_FLAG_RELEASED) {
        ucp_request_put(req);
    }
    UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", 0);
}
Example #24
0
static inline ucs_status_ptr_t
ucp_tag_send_req(ucp_request_t *req, size_t count, ssize_t max_short,
                 size_t zcopy_thresh, size_t rndv_thresh, ucp_send_callback_t cb,
                 const ucp_proto_t *proto)
{
    ucs_status_t status;

    switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) {
    case UCP_DATATYPE_CONTIG:
        status = ucp_tag_req_start_contig(req, count, max_short, zcopy_thresh,
                                          rndv_thresh, proto);
        if (status != UCS_OK) {
            return UCS_STATUS_PTR(status);
        }
        break;

    case UCP_DATATYPE_IOV:
        status = ucp_tag_req_start_iov(req, count, max_short, zcopy_thresh,
                                       rndv_thresh, proto);
        if (status != UCS_OK) {
            return UCS_STATUS_PTR(status);
        }
        break;

    case UCP_DATATYPE_GENERIC:
        ucp_tag_req_start_generic(req, count, rndv_thresh, proto);
        break;

    default:
        ucs_error("Invalid data type");
        return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM);
    }

    /*
     * Start the request.
     * If it is completed immediately, release the request and return the status.
     * Otherwise, return the request.
     */
    status = ucp_request_start_send(req);
    if (req->flags & UCP_REQUEST_FLAG_COMPLETED) {
        ucs_trace_req("releasing send request %p, returning status %s", req,
                      ucs_status_string(status));
        ucs_mpool_put(req);
        return UCS_STATUS_PTR(status);
    }

    ucs_trace_req("returning send request %p", req);
    req->send.cb = cb;
    return req + 1;
}
Example #25
0
/*
 * @param [in] rsc_tli  Resource index for every lane.
 */
static ucs_status_t ucp_wireup_msg_send(ucp_ep_h ep, uint8_t type,
                                        uint64_t tl_bitmap,
                                        const ucp_rsc_index_t *rsc_tli)
{
    ucp_rsc_index_t rsc_index;
    ucp_lane_index_t lane;
    unsigned order[UCP_MAX_LANES + 1];
    ucp_request_t* req;
    ucs_status_t status;
    void *address;

    ucs_assert(ep->cfg_index != (uint8_t)-1);

    req = ucs_mpool_get(&ep->worker->req_mp);
    if (req == NULL) {
        return UCS_ERR_NO_MEMORY;
    }

    req->flags                   = UCP_REQUEST_FLAG_RELEASED;
    req->send.ep                 = ep;
    req->send.cb                 = ucp_wireup_msg_send_completion;
    req->send.wireup.type        = type;
    req->send.uct.func           = ucp_wireup_msg_progress;

    /* pack all addresses */
    status = ucp_address_pack(ep->worker, ep, tl_bitmap, order,
                              &req->send.length, &address);
    if (status != UCS_OK) {
        ucs_error("failed to pack address: %s", ucs_status_string(status));
        return status;
    }

    req->send.buffer = address;

    /* send the indices addresses that should be connected by remote side */
    for (lane = 0; lane < UCP_MAX_LANES; ++lane) {
        rsc_index = rsc_tli[lane];
        if (rsc_index != UCP_NULL_RESOURCE) {
            req->send.wireup.tli[lane] = ucp_wireup_address_index(order,
                                                                  tl_bitmap,
                                                                  rsc_index);
        } else {
            req->send.wireup.tli[lane] = -1;
        }
    }

    ucp_request_start_send(req);
    return UCS_OK;
}
Example #26
0
/**
 * dispatch requests waiting for tx resources
 */
ucs_arbiter_cb_result_t
uct_dc_mlx5_iface_dci_do_pending_tx(ucs_arbiter_t *arbiter,
                                    ucs_arbiter_elem_t *elem,
                                    void *arg)
{

    uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_mlx5_ep_t, arb_group);
    uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t);
    uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv);
    ucs_status_t status;

    if (!uct_rc_iface_has_tx_resources(&iface->super.super)) {
        return UCS_ARBITER_CB_RESULT_STOP;
    }

    status = req->func(req);
    ucs_trace_data("progress pending request %p returned: %s", req,
                   ucs_status_string(status));
    if (status == UCS_OK) {
        /* For dcs* policies release dci if this is the last elem in the group
         * and the dci has no outstanding operations. For example pending
         * callback did not send anything. (uct_ep_flush or just return ok)
         */
        if (ucs_arbiter_elem_is_last(&ep->arb_group, elem)) {
            uct_dc_mlx5_iface_dci_free(iface, ep);
        }
        return UCS_ARBITER_CB_RESULT_REMOVE_ELEM;
    }
    if (status == UCS_INPROGRESS) {
        return UCS_ARBITER_CB_RESULT_NEXT_GROUP;
    }

    if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) {
        /* Deschedule the group even if FC is the only resource, which
         * is missing. It will be scheduled again when credits arrive.
         * We can't desched group with rand policy if non FC resources are
         * missing, since it's never scheduled again. */
        if (uct_dc_mlx5_iface_is_dci_rand(iface) &&
            uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
            return UCS_ARBITER_CB_RESULT_RESCHED_GROUP;
        } else {
            return UCS_ARBITER_CB_RESULT_DESCHED_GROUP;
        }
    }

    ucs_assertv(!uct_rc_iface_has_tx_resources(&iface->super.super),
                "pending callback returned error but send resources are available");
    return UCS_ARBITER_CB_RESULT_STOP;
}
Example #27
0
static int ucx_perf_thread_spawn(ucx_perf_params_t* params,
                                 ucx_perf_result_t* result) {
    ucx_perf_context_t perf;
    ucs_status_t status = UCS_OK;
    int ti, nti;

    omp_set_num_threads(params->thread_count);
    nti = params->thread_count;

    ucx_perf_thread_context_t* tctx =
        calloc(nti, sizeof(ucx_perf_thread_context_t));
    ucs_status_t* statuses =
        calloc(nti, sizeof(ucs_status_t));

    ucx_perf_test_reset(&perf, params);
    status = ucx_perf_funcs[params->api].setup(&perf, params);
    if (UCS_OK != status) {
        goto out_cleanup;
    }

#pragma omp parallel private(ti)
{
    ti = omp_get_thread_num();
    tctx[ti].tid = ti;
    tctx[ti].ntid = nti;
    tctx[ti].statuses = statuses;
    tctx[ti].params = *params;
    tctx[ti].perf = perf;
    /* Doctor the src and dst buffers to make them thread specific */
    tctx[ti].perf.send_buffer += ti * params->message_size;
    tctx[ti].perf.recv_buffer += ti * params->message_size;
    tctx[ti].perf.offset = ti * params->message_size;
    ucx_perf_thread_run_test((void*)&tctx[ti]);
}
    for (ti = 0; ti < nti; ti++) {
        if (UCS_OK != statuses[ti]) {
            ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, ucs_status_string(statuses[ti]));
            status = statuses[ti];
        }
    }

    ucx_perf_funcs[params->api].cleanup(&perf);

out_cleanup:
    free(statuses);
    free(tctx);

    return status;
}
Example #28
0
static UCS_F_ALWAYS_INLINE void
ucp_tag_recv_request_completed(ucp_request_t *req, ucs_status_t status,
                               ucp_tag_recv_info_t *info, const char *function)
{
    ucs_trace_req("%s returning completed request %p (%p) stag 0x%"PRIx64" len %zu, %s",
                  function, req, req + 1, info->sender_tag, info->length,
                  ucs_status_string(status));

    req->status = status;
    req->flags |= UCP_REQUEST_FLAG_COMPLETED;
    if (req->flags & UCP_REQUEST_FLAG_BLOCK_OFFLOAD) {
        --req->recv.worker->context->tm.sw_req_count;
    }
    UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", 0);
}
Example #29
0
static int ucx_perf_thread_spawn(ucx_perf_context_t *perf,
                                 ucx_perf_result_t* result)
{
    ucx_perf_thread_context_t* tctx;
    ucs_status_t* statuses;
    size_t message_size;
    ucs_status_t status;
    int ti, nti;

    message_size = ucx_perf_get_message_size(&perf->params);
    omp_set_num_threads(perf->params.thread_count);
    nti = perf->params.thread_count;

    tctx     = calloc(nti, sizeof(ucx_perf_thread_context_t));
    statuses = calloc(nti, sizeof(ucs_status_t));
    if ((tctx == NULL) || (statuses == NULL)) {
        status = UCS_ERR_NO_MEMORY;
        goto out_free;
    }

#pragma omp parallel private(ti)
{
    ti = omp_get_thread_num();
    tctx[ti].tid = ti;
    tctx[ti].ntid = nti;
    tctx[ti].statuses = statuses;
    tctx[ti].perf = *perf;
    /* Doctor the src and dst buffers to make them thread specific */
    tctx[ti].perf.send_buffer += ti * message_size;
    tctx[ti].perf.recv_buffer += ti * message_size;
    tctx[ti].perf.offset = ti * message_size;
    ucx_perf_thread_run_test((void*)&tctx[ti]);
}

    status = UCS_OK;
    for (ti = 0; ti < nti; ti++) {
        if (UCS_OK != statuses[ti]) {
            ucs_error("Thread %d failed to run test: %s", tctx[ti].tid,
                      ucs_status_string(statuses[ti]));
            status = statuses[ti];
        }
    }

out_free:
    free(statuses);
    free(tctx);
    return status;
}
Example #30
0
static ucs_status_t ucp_ep_send_wireup_am(ucp_ep_h ep, uct_ep_h uct_ep,
                                          uint8_t am_id, ucp_rsc_index_t dst_rsc_index)
{
    ucp_rsc_index_t rsc_index    = ep->uct.rsc_index;
    ucp_worker_h worker          = ep->worker;
    ucp_context_h context        = worker->context;
    uct_iface_attr_t *iface_attr = &worker->iface_attrs[rsc_index];
    ucp_wireup_msg_t *msg;
    ucs_status_t status;
    size_t msg_len;

    msg_len = sizeof(*msg) + iface_attr->ep_addr_len;

    /* TODO use custom pack callback to avoid this allocation and memcopy */
    msg = ucs_malloc(msg_len, "conn_req");
    if (msg == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    msg->src_uuid      = worker->uuid;
    msg->src_pd_index  = context->tl_rscs[rsc_index].pd_index;
    msg->src_rsc_index = rsc_index;
    msg->dst_rsc_index = dst_rsc_index;

    status = uct_ep_get_address(ep->uct.next_ep, (struct sockaddr *)(msg + 1));
    if (status != UCS_OK) {
        goto err_free;
    }

    status = uct_ep_am_bcopy(uct_ep, am_id, (uct_pack_callback_t)memcpy,
                             msg, msg_len);
    if (status != UCS_OK) {
        ucs_debug("failed to send conn msg: %s%s", ucs_status_string(status),
                  (status == UCS_ERR_NO_RESOURCE) ? ", will retry" : "");
        goto err_free;
    }

    ucp_wireup_log(worker, am_id, msg, 1);
    ucs_free(msg);
    return UCS_OK;

err_free:
    ucs_free(msg);
err:
    return status;
}