예제 #1
0
파일: sys.c 프로젝트: ornl-languages/ucx
int ucs_get_first_cpu()
{
    int first_cpu, total_cpus, ret;
    cpu_set_t mask;

    ret = sysconf(_SC_NPROCESSORS_CONF);
    if (ret < 0) {
        ucs_error("failed to get local cpu count: %m");
        return ret;
    }
    total_cpus = ret;

    CPU_ZERO(&mask);
    ret = sched_getaffinity(0, sizeof(mask), &mask);
    if (ret < 0) {
        ucs_error("failed to get process affinity: %m");
        return ret;
    }

    for (first_cpu = 0; first_cpu < total_cpus; ++first_cpu) {
        if (CPU_ISSET(first_cpu, &mask)) {
            return first_cpu;
        }
    }

    return total_cpus;
}
예제 #2
0
파일: mm_posix.c 프로젝트: igor-ivanov/ucx
static ucs_status_t uct_posix_shm_open(const char *file_name, size_t length, int *shm_fd)
{
    ucs_status_t status;

    /* Create shared memory object and set its size */
    *shm_fd = shm_open(file_name, O_CREAT | O_RDWR | O_EXCL,
                       UCT_MM_POSIX_SHM_OPEN_MODE);
    if (*shm_fd == -1) {
        ucs_error("Error returned from shm_open %m. File name is: %s",
                  file_name);
        status = UCS_ERR_SHMEM_SEGMENT;
        goto err;
    }
    if (ftruncate(*shm_fd, length) == -1) {
        ucs_error("Error returned from ftruncate %m");
        status = UCS_ERR_SHMEM_SEGMENT;
        goto err_shm_unlink;
    }

    return UCS_OK;

err_shm_unlink:
    close(*shm_fd);
    if (shm_unlink(file_name) != 0) {
        ucs_warn("unable to shm_unlink the shared memory segment");
    }
err:
    return status;
}
예제 #3
0
파일: libperf.c 프로젝트: xinzhao3/ucx
static ucs_status_t ucx_perf_test_check_params(ucx_perf_params_t *params)
{
    size_t it;

    if (ucx_perf_get_message_size(params) < 1) {
        if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("Message size too small, need to be at least 1");
        }
        return UCS_ERR_INVALID_PARAM;
    }

    if (params->max_outstanding < 1) {
        if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("max_outstanding, need to be at least 1");
        }
        return UCS_ERR_INVALID_PARAM;
    }

    /* check if particular message size fit into stride size */
    if (params->iov_stride) {
        for (it = 0; it < params->msg_size_cnt; ++it) {
            if (params->msg_size_list[it] > params->iov_stride) {
                if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                    ucs_error("Buffer size %lu bigger than stride %lu",
                              params->msg_size_list[it], params->iov_stride);
                }
                return UCS_ERR_INVALID_PARAM;
            }
        }
    }

    return UCS_OK;
}
예제 #4
0
파일: mm_posix.c 프로젝트: igor-ivanov/ucx
static ucs_status_t uct_posix_open(const char *file_name, size_t length, int *shm_fd)
{
    ucs_status_t status;

    /* use open with the given path */
    *shm_fd = open(file_name, O_CREAT | O_RDWR | O_EXCL, UCT_MM_POSIX_SHM_OPEN_MODE);
    if (*shm_fd == -1) {
        ucs_error("Error returned from open %m . File name is: %s", file_name);
        status = UCS_ERR_SHMEM_SEGMENT;
        goto err;
    }

    if (ftruncate(*shm_fd, length) == -1) {
        ucs_error("Error returned from ftruncate %m");
        status = UCS_ERR_SHMEM_SEGMENT;
        goto err_close;
    }

    return UCS_OK;

err_close:
    close(*shm_fd);
    if (unlink(file_name) != 0) {
        ucs_warn("unable to unlink the shared memory segment");
    }
err:
    return status;
}
예제 #5
0
ucs_status_t ugni_activate_iface(uct_ugni_iface_t *iface)
{
    ucs_status_t status;
    gni_return_t ugni_rc;
    uint32_t pe_address;

    if(iface->activated) {
        return UCS_OK;
    }

    status = uct_ugni_init_nic(0, &iface->domain_id,
                               &iface->cdm_handle, &iface->nic_handle,
                               &pe_address);
    if (UCS_OK != status) {
        ucs_error("Failed to UGNI NIC, Error status: %d", status);
        return status;
    }

    ucs_debug("Made ugni interface. iface->dev->nic_addr = %i iface->domain_id = %i", iface->dev->address, iface->domain_id);

    ugni_rc = GNI_CqCreate(iface->nic_handle, UCT_UGNI_LOCAL_CQ, 0,
                           GNI_CQ_NOBLOCK,
                           NULL, NULL, &iface->local_cq);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CqCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }
    iface->activated = true;

    /* iface is activated */
    return UCS_OK;
}
예제 #6
0
파일: async.c 프로젝트: alex-mikheev/ucx
/* add new handler to the table */
static ucs_status_t ucs_async_handler_add(ucs_async_handler_t *handler)
{
    int hash_extra_status;
    ucs_status_t status;
    khiter_t hash_it;

    pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock);

    ucs_assert_always(handler->refcount == 1);
    hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers,
                     handler->id, &hash_extra_status);
    if (hash_extra_status == -1) {
        ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT " to hash",
                  UCS_ASYNC_HANDLER_ARG(handler));
        status = UCS_ERR_NO_MEMORY;
        goto out_unlock;
    } else if (hash_extra_status == 0) {
        ucs_error("Async handler " UCS_ASYNC_HANDLER_FMT " exists - cannot add %s()",
                  UCS_ASYNC_HANDLER_ARG(kh_value(&ucs_async_global_context.handlers, hash_it)),
                  ucs_debug_get_symbol_name(handler->cb));
        status = UCS_ERR_ALREADY_EXISTS;
        goto out_unlock;
    }

    ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it));
    kh_value(&ucs_async_global_context.handlers, hash_it) = handler;
    ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash",
              UCS_ASYNC_HANDLER_ARG(handler));
    status = UCS_OK;

out_unlock:
    pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock);
    return status;
}
예제 #7
0
파일: ugni_iface.c 프로젝트: hppritcha/ucx
ucs_status_t ugni_activate_iface(uct_ugni_iface_t *iface)
{
    int rc;
    gni_return_t ugni_rc;

    if(iface->activated) {
        return UCS_OK;
    }

    rc = uct_ugni_init_nic(0, &iface->domain_id,
                           &iface->cdm_handle, &iface->nic_handle,
                           &iface->pe_address);
    if (UCS_OK != rc) {
        ucs_error("Failed to UGNI NIC, Error status: %d", rc);
        return rc;
    }

    ugni_rc = GNI_CqCreate(iface->nic_handle, UCT_UGNI_LOCAL_CQ, 0,
                           GNI_CQ_NOBLOCK,
                           NULL, NULL, &iface->local_cq);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CqCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }
    iface->activated = true;

    /* iface is activated */
    return UCS_OK;
}
예제 #8
0
static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    ucs_status_t status;
    uct_self_iface_config_t *self_config = 0;

    ucs_trace_func("Creating a loop-back transport self=%p rxh=%lu",
                   self, params->rx_headroom);

    if (strcmp(params->dev_name, UCT_SELF_NAME) != 0) {
        ucs_error("No device was found: %s", params->dev_name);
        return UCS_ERR_NO_DEVICE;
    }

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker,
                              tl_config UCS_STATS_ARG(params->stats_root)
                              UCS_STATS_ARG(UCT_SELF_NAME));

    self_config = ucs_derived_of(tl_config, uct_self_iface_config_t);

    self->id              = ucs_generate_uuid((uintptr_t)self);
    self->rx_headroom     = params->rx_headroom;
    self->data_length     = self_config->super.max_bcopy;
    self->release_desc.cb = uct_self_iface_release_desc;

    /* create a memory pool for data transferred */
    status = uct_iface_mpool_init(&self->super,
                                  &self->msg_desc_mp,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom +
                                                            self->data_length,
                                  sizeof(uct_recv_desc_t) + self->rx_headroom,
                                  UCS_SYS_CACHE_LINE_SIZE,
                                  &self_config->mp,
                                  256,
                                  ucs_empty_function,
                                  "self_msg_desc");
    if (UCS_OK != status) {
        ucs_error("Failed to create a memory pool for the loop-back transport");
        goto err;
    }

    /* set the message descriptor for the loop-back */
    self->msg_cur_desc = ucs_mpool_get(&self->msg_desc_mp);
    VALGRIND_MAKE_MEM_DEFINED(self->msg_cur_desc, sizeof(*(self->msg_cur_desc)));
    if (NULL == self->msg_cur_desc) {
        ucs_error("Failed to get the first descriptor in loop-back MP storage");
        status = UCS_ERR_NO_RESOURCE;
        goto destroy_mpool;
    }

    ucs_debug("Created a loop-back iface. id=0x%lx, desc=%p, len=%u, tx_hdr=%lu",
              self->id, self->msg_cur_desc, self->data_length, self->rx_headroom);
    return UCS_OK;

destroy_mpool:
    ucs_mpool_cleanup(&self->msg_desc_mp, 1);
err:
    return status;
}
예제 #9
0
static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out)
{
    uint32_t rem_addr, rem_id;
    gni_post_state_t post_state;
    gni_return_t ugni_rc;
    uct_ugni_udt_ep_t *ep;
    gni_ep_handle_t gni_ep;
    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *header;

    ucs_trace_func("iface=%p, id=%lx", iface, id);

    if (UCT_UGNI_UDT_ANY == id) {
        ep = NULL;
        gni_ep = iface->ep_any;
        desc = iface->desc_any;
    } else {
        ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id),
                            uct_ugni_udt_ep_t);
        gni_ep = ep->super.ep;
        desc = ep->posted_desc;
    }

    *ep_out = ep;
    uct_ugni_device_lock(&iface->super.cdm);
    ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id);
    uct_ugni_device_unlock(&iface->super.cdm);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d",
                  id, gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_IO_ERROR;
    }
    if (GNI_POST_TERMINATED == post_state) {
        return UCS_ERR_CANCELED;
    }

    if (GNI_POST_COMPLETED != post_state) {
        ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state);
        return UCS_ERR_IO_ERROR;
    }

    if (UCT_UGNI_UDT_ANY != id) {
        --iface->super.outstanding;
    }

    header = uct_ugni_udt_get_rheader(desc, iface);

    ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id);

    if (UCT_UGNI_UDT_PAYLOAD != header->type) {
        /* ack message, no data */
        ucs_assert_always(NULL != ep);
        ucs_mpool_put(ep->posted_desc);
        uct_ugni_check_flush(ep->desc_flush_group);
        ep->posted_desc = NULL;
        return UCS_OK;
    }

    return UCS_INPROGRESS;
}
예제 #10
0
파일: ugni_md.c 프로젝트: henrypan/ucx
static ucs_status_t uct_ugni_rkey_unpack(uct_md_component_t *mdc, const void *rkey_buffer,
                                         uct_rkey_t *rkey_p, void **handle_p)
{
    const uint64_t *ptr = rkey_buffer;
    gni_mem_handle_t *mem_hndl = NULL;
    uint64_t magic = 0;

    ucs_debug("Unpacking [ %"PRIx64" %"PRIx64" %"PRIx64"]", ptr[0], ptr[1], ptr[2]);
    magic = ptr[0];
    if (magic != UCT_UGNI_RKEY_MAGIC) {
        ucs_error("Failed to identify key. Expected %llx but received %"PRIx64"",
                  UCT_UGNI_RKEY_MAGIC, magic);
        return UCS_ERR_UNSUPPORTED;
    }

    mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t");
    if (NULL == mem_hndl) {
        ucs_error("Failed to allocate memory for gni_mem_handle_t");
        return UCS_ERR_NO_MEMORY;
    }

    mem_hndl->qword1 = ptr[1];
    mem_hndl->qword2 = ptr[2];
    *rkey_p = (uintptr_t)mem_hndl;
    *handle_p = NULL;
    return UCS_OK;
}
예제 #11
0
파일: mm_xpmem.c 프로젝트: henrypan/ucx
static ucs_status_t uct_xpmem_detach(uct_mm_remote_seg_t *mm_desc)
{
    xpmem_apid_t apid = mm_desc->cookie;
    void *address;
    int ret;

    address = ucs_align_down_pow2_ptr(mm_desc->address, ucs_get_page_size());

    ucs_trace("xpmem detaching address %p", address);
    ret = xpmem_detach(address);
    if (ret < 0) {
        ucs_error("Failed to xpmem_detach: %m");
        return UCS_ERR_IO_ERROR;
    }

    VALGRIND_MAKE_MEM_UNDEFINED(mm_desc->address, mm_desc->length);

    ucs_trace("xpmem releasing segment apid 0x%llx", apid);
    ret = xpmem_release(apid);
    if (ret < 0) {
        ucs_error("Failed to release xpmem segment apid 0x%llx", apid);
        return UCS_ERR_IO_ERROR;
    }

    return UCS_OK;
}
예제 #12
0
static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface)
{
    gni_return_t ugni_rc;
    uint32_t rem_addr, rem_id;
    gni_post_state_t post_state;
    uct_ugni_device_lock(&iface->super.cdm);
    ugni_rc = GNI_EpPostDataCancelById(iface->ep_any, UCT_UGNI_UDT_ANY);
    if (GNI_RC_SUCCESS != ugni_rc) {
        uct_ugni_device_unlock(&iface->super.cdm);
        ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }
    ugni_rc = GNI_EpPostDataTestById(iface->ep_any, UCT_UGNI_UDT_ANY, &post_state, &rem_addr, &rem_id);
    if (GNI_RC_SUCCESS != ugni_rc) {
        if (GNI_RC_NO_MATCH != ugni_rc) {
            uct_ugni_device_unlock(&iface->super.cdm);
            ucs_error("GNI_EpPostDataTestById failed, Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
            return;
        }
    } else {
        if (GNI_POST_PENDING == post_state) {
            ugni_rc = GNI_EpPostDataWaitById(iface->ep_any, UCT_UGNI_UDT_ANY, -1, &post_state, &rem_addr, &rem_id);
        }
    }
    ugni_rc = GNI_EpDestroy(iface->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpDestroy failed, Error status: %s %d\n",
                  gni_err_str[ugni_rc], ugni_rc);
    }
    uct_ugni_device_unlock(&iface->super.cdm);
}
예제 #13
0
static ucs_status_t uct_ugni_smsg_mbox_reg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox)
{
    gni_return_t ugni_rc;
    void *address = (mbox+1);

    if (0 == iface->bytes_per_mbox) {
        ucs_error("Unexpected length %zu", iface->bytes_per_mbox);
        return UCS_ERR_INVALID_PARAM;
    }

    uct_ugni_cdm_lock(&iface->super.cdm);
    ugni_rc = GNI_MemRegister(uct_ugni_iface_nic_handle(&iface->super), (uint64_t)address,
                              iface->bytes_per_mbox, iface->remote_cq,
                              GNI_MEM_READWRITE,
                              -1, &(mbox->gni_mem));
    uct_ugni_cdm_unlock(&iface->super.cdm);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d",
                  address, iface->bytes_per_mbox, gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_IO_ERROR;
    }

    mbox->base_address = (uintptr_t)address;

    return UCS_OK;
}
예제 #14
0
파일: libperf.c 프로젝트: xinzhao3/ucx
ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result)
{
    ucx_perf_context_t *perf;
    ucs_status_t status;

    if (params->command == UCX_PERF_CMD_LAST) {
        ucs_error("Test is not selected");
        status = UCS_ERR_INVALID_PARAM;
        goto out;
    }

    if ((params->api != UCX_PERF_API_UCT) && (params->api != UCX_PERF_API_UCP)) {
        ucs_error("Invalid test API parameter (should be UCT or UCP)");
        status = UCS_ERR_INVALID_PARAM;
        goto out;
    }

    perf = malloc(sizeof(*perf));
    if (perf == NULL) {
        status = UCS_ERR_NO_MEMORY;
        goto out;
    }

    ucx_perf_test_reset(perf, params);

    status = ucx_perf_funcs[params->api].setup(perf, params);
    if (status != UCS_OK) {
        goto out_free;
    }

    if (UCS_THREAD_MODE_SINGLE == params->thread_mode) {
        if (params->warmup_iter > 0) {
            ucx_perf_set_warmup(perf, params);
            status = ucx_perf_funcs[params->api].run(perf);
            if (status != UCS_OK) {
                goto out_cleanup;
            }

            rte_call(perf, barrier);
            ucx_perf_test_reset(perf, params);
        }

        /* Run test */
        status = ucx_perf_funcs[params->api].run(perf);
        rte_call(perf, barrier);
        if (status == UCS_OK) {
            ucx_perf_calc_result(perf, result);
            rte_call(perf, report, result, perf->params.report_arg, 1);
        }
    } else {
        status = ucx_perf_thread_spawn(perf, result);
    }

out_cleanup:
    ucx_perf_funcs[params->api].cleanup(perf);
out_free:
    free(perf);
out:
    return status;
}
예제 #15
0
static void process_mbox(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_ep_t *ep){
    ucs_status_t status;
    uint8_t tag;
    void *data_ptr;
    gni_return_t ugni_rc;
    uct_ugni_smsg_header_t *header;
    void *user_data;

    pthread_mutex_lock(&uct_ugni_global_lock);

    while(1){
        tag = GNI_SMSG_ANY_TAG;
        ugni_rc = GNI_SmsgGetNextWTag(ep->super.ep, (void **)&data_ptr, &tag);

        /* Yes, GNI_RC_NOT_DONE means that you're done with the smsg mailbox */
        if(GNI_RC_NOT_DONE == ugni_rc){
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        if(GNI_RC_SUCCESS != ugni_rc){
            ucs_error("Unhandled smsg error: %s %d", gni_err_str[ugni_rc], ugni_rc);
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        if(NULL == data_ptr){
            ucs_error("Empty data pointer in smsg.");
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        header = (uct_ugni_smsg_header_t *)data_ptr;
        user_data = (void *)(header + 1);
        void *user_desc = iface->user_desc+1;

        uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                           tag, user_data, header->length, "RX: AM");

        pthread_mutex_unlock(&uct_ugni_global_lock);
        status = uct_iface_invoke_am(&iface->super.super, tag, user_data,
                                     header->length, user_desc);
        pthread_mutex_lock(&uct_ugni_global_lock);

        if(status != UCS_OK){
            uct_recv_desc_iface(user_desc) = &iface->super.super.super;
            UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                                     iface->user_desc, iface->user_desc = NULL);
        }

        ugni_rc = GNI_SmsgRelease(ep->super.ep);
        if(GNI_RC_SUCCESS != ugni_rc){
            ucs_error("Unhandled smsg error in GNI_SmsgRelease: %s %d", gni_err_str[ugni_rc], ugni_rc);
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }
    }
}
예제 #16
0
파일: ucp_ep.c 프로젝트: alex--m/ucx
ucs_status_t ucp_ep_new(ucp_worker_h worker, uint64_t dest_uuid,
                        const char *peer_name, const char *message,
                        ucp_ep_h *ep_p)
{
    ucs_status_t status;
    ucp_ep_config_key_t key;
    ucp_ep_h ep;
    khiter_t hash_it;
    int hash_extra_status = 0;

    ep = ucs_calloc(1, sizeof(*ep), "ucp ep");
    if (ep == NULL) {
        ucs_error("Failed to allocate ep");
        status = UCS_ERR_NO_MEMORY;
        goto err;
    }

    /* EP configuration without any lanes */
    memset(&key, 0, sizeof(key));
    key.rma_lane_map     = 0;
    key.amo_lane_map     = 0;
    key.reachable_md_map = 0;
    key.am_lane          = UCP_NULL_RESOURCE;
    key.rndv_lane        = UCP_NULL_RESOURCE;
    key.wireup_msg_lane  = UCP_NULL_LANE;
    key.num_lanes        = 0;
    memset(key.amo_lanes, UCP_NULL_LANE, sizeof(key.amo_lanes));

    ep->worker           = worker;
    ep->dest_uuid        = dest_uuid;
    ep->cfg_index        = ucp_worker_get_ep_config(worker, &key);
    ep->am_lane          = UCP_NULL_LANE;
    ep->flags            = 0;
#if ENABLE_DEBUG_DATA
    ucs_snprintf_zero(ep->peer_name, UCP_WORKER_NAME_MAX, "%s", peer_name);
#endif

    hash_it = kh_put(ucp_worker_ep_hash, &worker->ep_hash, dest_uuid,
                     &hash_extra_status);
    if (ucs_unlikely(hash_it == kh_end(&worker->ep_hash))) {
        ucs_error("Hash failed with ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s "
                  "with status %d", ep, peer_name, worker->uuid, ep->dest_uuid,
                  message, hash_extra_status);
        status = UCS_ERR_NO_RESOURCE;
        goto err_free_ep;
    }
    kh_value(&worker->ep_hash, hash_it) = ep;

    *ep_p = ep;
    ucs_debug("created ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s", ep, peer_name,
              worker->uuid, ep->dest_uuid, message);
    return UCS_OK;

err_free_ep:
    ucs_free(ep);
err:
    return status;
}
예제 #17
0
static ucs_status_t
uct_gdr_copy_mem_reg_internal(uct_md_h uct_md, void *address, size_t length,
                              unsigned flags, uct_gdr_copy_mem_t *mem_hndl)
{
    uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
    CUdeviceptr d_ptr = ((CUdeviceptr )(char *) address);
    gdr_mh_t mh;
    void *bar_ptr;
    gdr_info_t info;
    int ret;

    if (!length) {
        mem_hndl->mh = 0;
        return UCS_OK;
    }

    ret = gdr_pin_buffer(md->gdrcpy_ctx, d_ptr, length, 0, 0, &mh);
    if (ret) {
        ucs_error("gdr_pin_buffer failed. length :%lu ret:%d", length, ret);
        goto err;
    }

    ret = gdr_map(md->gdrcpy_ctx, mh, &bar_ptr, length);
    if (ret) {
        ucs_error("gdr_map failed. length :%lu ret:%d", length, ret);
        goto unpin_buffer;
    }

    ret = gdr_get_info(md->gdrcpy_ctx, mh, &info);
    if (ret) {
        ucs_error("gdr_get_info failed. ret:%d", ret);
        goto unmap_buffer;
    }

    mem_hndl->mh        = mh;
    mem_hndl->info      = info;
    mem_hndl->bar_ptr   = bar_ptr;
    mem_hndl->reg_size  = length;

    ucs_trace("registered memory:%p..%p length:%lu info.va:0x%"PRIx64" bar_ptr:%p",
              address, address + length, length, info.va, bar_ptr);

    return UCS_OK;

unmap_buffer:
    ret = gdr_unmap(md->gdrcpy_ctx, mem_hndl->mh, mem_hndl->bar_ptr, mem_hndl->reg_size);
    if (ret) {
        ucs_warn("gdr_unmap failed. unpin_size:%lu ret:%d", mem_hndl->reg_size, ret);
    }
unpin_buffer:
    ret = gdr_unpin_buffer(md->gdrcpy_ctx, mh);
    if (ret) {
        ucs_warn("gdr_unpin_buffer failed. ret;%d", ret);
    }
err:
    return UCS_ERR_IO_ERROR;
}
예제 #18
0
static ucs_status_t ucp_add_tl_resources(ucp_context_h context,
                                         uct_pd_h pd, ucp_rsc_index_t pd_index,
                                         const ucp_config_t *config,
                                         unsigned *num_resources_p,
                                         uint64_t *masks)
{
    uct_tl_resource_desc_t *tl_resources;
    ucp_tl_resource_desc_t *tmp;
    unsigned num_resources;
    ucs_status_t status;
    ucp_rsc_index_t i;

    *num_resources_p = 0;

    /* check what are the available uct resources */
    status = uct_pd_query_tl_resources(pd, &tl_resources, &num_resources);
    if (status != UCS_OK) {
        ucs_error("Failed to query resources: %s", ucs_status_string(status));
        goto err;
    }

    if (num_resources == 0) {
        ucs_debug("No tl resources found for pd %s", context->pd_rscs[pd_index].pd_name);
        goto out_free_resources;
    }

    tmp = ucs_realloc(context->tl_rscs,
                      sizeof(*context->tl_rscs) * (context->num_tls + num_resources),
                      "ucp resources");
    if (tmp == NULL) {
        ucs_error("Failed to allocate resources");
        status = UCS_ERR_NO_MEMORY;
        goto err_free_resources;
    }

    /* copy only the resources enabled by user configuration */
    context->tl_rscs = tmp;
    for (i = 0; i < num_resources; ++i) {
        if (ucp_is_resource_enabled(&tl_resources[i], config, masks)) {
            context->tl_rscs[context->num_tls].tl_rsc   = tl_resources[i];
            context->tl_rscs[context->num_tls].pd_index = pd_index;
            ++context->num_tls;
            ++(*num_resources_p);
        }
    }

out_free_resources:
    uct_release_tl_resource_list(tl_resources);
    return UCS_OK;

err_free_resources:
    uct_release_tl_resource_list(tl_resources);
err:
    return status;
}
예제 #19
0
파일: async.c 프로젝트: openucx/ucx
/* add new handler to the table */
static ucs_status_t ucs_async_handler_add(int min_id, int max_id,
                                          ucs_async_handler_t *handler)
{
    int hash_extra_status;
    ucs_status_t status;
    khiter_t hash_it;
    int i, id;

    pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock);

    handler->id = -1;
    ucs_assert_always(handler->refcount == 1);

    /*
     * Search for an empty key in the range [min_id, max_id)
     * ucs_async_global_context.handler_id is used to generate "unique" keys.
     */
    for (i = min_id; i < max_id; ++i) {
        id = min_id + (ucs_atomic_fadd32(&ucs_async_global_context.handler_id, 1) %
                       (max_id - min_id));
        hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers,
                         id, &hash_extra_status);
        if (hash_extra_status == -1) {
            ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT
                      " to hash", UCS_ASYNC_HANDLER_ARG(handler));
            status = UCS_ERR_NO_MEMORY;
            goto out_unlock;
        } else if (hash_extra_status != 0) {
            handler->id = id;
            ucs_assert(id != -1);
            break;
        }
    }

    if (handler->id == -1) {
        ucs_error("Cannot add async handler %s() - id range [%d..%d) is full",
                  ucs_debug_get_symbol_name(handler->cb), min_id, max_id);
        status = UCS_ERR_ALREADY_EXISTS;
        goto out_unlock;
    }

    ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it));
    kh_value(&ucs_async_global_context.handlers, hash_it) = handler;
    ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash",
              UCS_ASYNC_HANDLER_ARG(handler));
    status = UCS_OK;

out_unlock:
    pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock);
    return status;
}
예제 #20
0
파일: sys.c 프로젝트: ornl-languages/ucx
static uint64_t ucs_get_mac_address()
{
    static uint64_t mac_address = 0;
    struct ifreq ifr, *it, *end;
    struct ifconf ifc;
    char buf[1024];
    int sock;

    if (mac_address == 0) {
        sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
        if (sock == -1) {
            ucs_error("failed to create socket: %m");
            return 0;
        }

        ifc.ifc_len = sizeof(buf);
        ifc.ifc_buf = buf;
        if (ioctl(sock, SIOCGIFCONF, &ifc) == -1) {
            ucs_error("ioctl(SIOCGIFCONF) failed: %m");
            close(sock);
            return 0;
        }

        it = ifc.ifc_req;
        end = it + (ifc.ifc_len / sizeof *it);
        for (it = ifc.ifc_req; it != end; ++it) {
            strcpy(ifr.ifr_name, it->ifr_name);
            if (ioctl(sock, SIOCGIFFLAGS, &ifr) != 0) {
                ucs_error("ioctl(SIOCGIFFLAGS) failed: %m");
                close(sock);
                return 0;
            }

            if (!(ifr.ifr_flags & IFF_LOOPBACK)) {
                if (ioctl(sock, SIOCGIFHWADDR, &ifr) != 0) {
                    ucs_error("ioctl(SIOCGIFHWADDR) failed: %m");
                    close(sock);
                    return 0;
                }

                memcpy(&mac_address, ifr.ifr_hwaddr.sa_data, 6);
                break;
            }
        }

        close(sock);
        ucs_trace("MAC address is 0x%012"PRIX64, mac_address);
    }

    return mac_address;
}
예제 #21
0
파일: libperf.c 프로젝트: xinzhao3/ucx
static ucs_status_t ucp_perf_test_fill_params(ucx_perf_params_t *params,
                                               ucp_params_t *ucp_params)
{
    ucs_status_t status, message_size;

    message_size = ucx_perf_get_message_size(params);
    switch (params->command) {
    case UCX_PERF_CMD_PUT:
    case UCX_PERF_CMD_GET:
        ucp_params->features |= UCP_FEATURE_RMA;
        break;
    case UCX_PERF_CMD_ADD:
    case UCX_PERF_CMD_FADD:
    case UCX_PERF_CMD_SWAP:
    case UCX_PERF_CMD_CSWAP:
        if (message_size == sizeof(uint32_t)) {
            ucp_params->features |= UCP_FEATURE_AMO32;
        } else if (message_size == sizeof(uint64_t)) {
            ucp_params->features |= UCP_FEATURE_AMO64;
        } else {
            if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
                ucs_error("Atomic size should be either 32 or 64 bit");
            }
            return UCS_ERR_INVALID_PARAM;
        }

        break;
    case UCX_PERF_CMD_TAG:
        ucp_params->features    |= UCP_FEATURE_TAG;
        ucp_params->field_mask  |= UCP_PARAM_FIELD_REQUEST_SIZE;
        ucp_params->request_size = sizeof(ucp_perf_request_t);
        break;
    case UCX_PERF_CMD_STREAM:
        ucp_params->features    |= UCP_FEATURE_STREAM;
        ucp_params->field_mask  |= UCP_PARAM_FIELD_REQUEST_SIZE;
        ucp_params->request_size = sizeof(ucp_perf_request_t);
        break;
    default:
        if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) {
            ucs_error("Invalid test command");
        }
        return UCS_ERR_INVALID_PARAM;
    }

    status = ucx_perf_test_check_params(params);
    if (status != UCS_OK) {
        return status;
    }

    return UCS_OK;
}
예제 #22
0
파일: ugni_iface.c 프로젝트: hppritcha/ucx
static ucs_status_t uct_ugni_fetch_pmi()
{
    int spawned = 0,
        rc;

    if(job_info.initialized) {
        return UCS_OK;
    }

    /* Fetch information from Cray's PMI */
    rc = PMI_Init(&spawned);
    if (PMI_SUCCESS != rc) {
        ucs_error("PMI_Init failed, Error status: %d", rc);
        return UCS_ERR_IO_ERROR;
    }
    ucs_debug("PMI spawned %d", spawned);

    rc = PMI_Get_size(&job_info.pmi_num_of_ranks);
    if (PMI_SUCCESS != rc) {
        ucs_error("PMI_Get_size failed, Error status: %d", rc);
        return UCS_ERR_IO_ERROR;
    }
    ucs_debug("PMI size %d", job_info.pmi_num_of_ranks);

    rc = PMI_Get_rank(&job_info.pmi_rank_id);
    if (PMI_SUCCESS != rc) {
        ucs_error("PMI_Get_rank failed, Error status: %d", rc);
        return UCS_ERR_IO_ERROR;
    }
    ucs_debug("PMI rank %d", job_info.pmi_rank_id);

    rc = get_ptag(&job_info.ptag);
    if (UCS_OK != rc) {
        ucs_error("get_ptag failed, Error status: %d", rc);
        return rc;
    }
    ucs_debug("PMI ptag %d", job_info.ptag);

    rc = get_cookie(&job_info.cookie);
    if (UCS_OK != rc) {
        ucs_error("get_cookie failed, Error status: %d", rc);
        return rc;
    }
    ucs_debug("PMI cookie %d", job_info.cookie);

    /* Context and domain is activated */
    job_info.initialized = true;
    ucs_debug("UGNI job info was activated");
    return UCS_OK;
}
예제 #23
0
static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length,
                                         unsigned flags, uct_mem_h *memh_p)
{
    uct_gdr_copy_mem_t *mem_hndl = NULL;
    size_t reg_size;
    void *ptr;
    ucs_status_t status;

    mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle");
    if (NULL == mem_hndl) {
        ucs_error("failed to allocate memory for gdr_copy_mem_t");
        return UCS_ERR_NO_MEMORY;
    }

    reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
    ptr = (void *) ((uintptr_t)address & GPU_PAGE_MASK);

    status = uct_gdr_copy_mem_reg_internal(uct_md, ptr, reg_size, 0, mem_hndl);
    if (status != UCS_OK) {
        ucs_free(mem_hndl);
        return status;
    }

    *memh_p = mem_hndl;
    return UCS_OK;
}
예제 #24
0
파일: ucp_ep.c 프로젝트: bbenton/ucx
static ucs_status_t ucp_ep_new(ucp_worker_h worker, uint64_t dest_uuid,
                               const char *peer_name, const char *message,
                               ucp_ep_h *ep_p)
{
    ucp_ep_h ep;

    ep = ucs_calloc(1, sizeof(*ep), "ucp ep");
    if (ep == NULL) {
        ucs_error("Failed to allocate ep");
        return UCS_ERR_NO_MEMORY;
    }

    ep->worker               = worker;
    ep->dest_uuid            = dest_uuid;
    ep->rma_dst_pdi          = UCP_NULL_RESOURCE;
    ep->amo_dst_pdi          = UCP_NULL_RESOURCE;
    ep->cfg_index            = 0;
    ep->flags                = 0;
#if ENABLE_DEBUG_DATA
    ucs_snprintf_zero(ep->peer_name, UCP_WORKER_NAME_MAX, "%s", peer_name);
#endif
    sglib_hashed_ucp_ep_t_add(worker->ep_hash, ep);

    *ep_p                    = ep;
    ucs_debug("created ep %p to %s 0x%"PRIx64"->0x%"PRIx64" %s", ep, peer_name,
              worker->uuid, ep->dest_uuid, message);
    return UCS_OK;
}
예제 #25
0
static ucs_status_t progress_local_cq(uct_ugni_smsg_iface_t *iface){
    gni_return_t ugni_rc;
    gni_cq_entry_t event_data;
    uct_ugni_smsg_desc_t message_data;
    uct_ugni_smsg_desc_t *message_pointer;

    ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data);

    if(GNI_RC_NOT_DONE == ugni_rc){
        return UCS_OK;
    }

    if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){
        /* TODO: handle overruns */
        ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data));
        return UCS_ERR_NO_RESOURCE;
    }

    message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data);
    message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data);
    ucs_assert(NULL != message_pointer);
    message_pointer->ep->outstanding--;
    iface->super.outstanding--;
    uct_ugni_ep_check_flush(message_pointer->ep);
    sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer);
    ucs_mpool_put(message_pointer);
    return UCS_INPROGRESS;
}
예제 #26
0
ucs_status_t progress_remote_cq(uct_ugni_smsg_iface_t *iface)
{
    gni_return_t ugni_rc;
    gni_cq_entry_t event_data;
    uct_ugni_ep_t tl_ep;
    uct_ugni_ep_t *ugni_ep;
    uct_ugni_smsg_ep_t *ep;

    ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data);

    if(GNI_RC_NOT_DONE == ugni_rc){
        return UCS_OK;
    }

    if (GNI_RC_SUCCESS != ugni_rc || !GNI_CQ_STATUS_OK(event_data) || GNI_CQ_OVERRUN(event_data)) {
        if(GNI_RC_ERROR_RESOURCE == ugni_rc || (GNI_RC_SUCCESS == ugni_rc && GNI_CQ_OVERRUN(event_data))){
            ucs_debug("Detected remote CQ overrun. ungi_rc = %d [%s]", ugni_rc, gni_err_str[ugni_rc]);
            uct_ugni_smsg_handle_remote_overflow(iface);
            return UCS_OK;
        }
        ucs_error("GNI_CqGetEvent falied with unhandled error. Error status %s %d ",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_IO_ERROR;
    }

    tl_ep.hash_key = GNI_CQ_GET_INST_ID(event_data);
    ugni_ep = sglib_hashed_uct_ugni_ep_t_find_member(iface->super.eps, &tl_ep);
    ep = ucs_derived_of(ugni_ep, uct_ugni_smsg_ep_t);

    process_mbox(iface, ep);
    return UCS_INPROGRESS;
}
예제 #27
0
파일: ud_mlx5.c 프로젝트: tonycurtis/ucx
static UCS_F_NOINLINE void
uct_ud_mlx5_iface_post_recv(uct_ud_mlx5_iface_t *iface)
{
    unsigned batch = iface->super.config.rx_max_batch;
    struct mlx5_wqe_data_seg *rx_wqes;
    uint16_t pi, next_pi, count;
    uct_ib_iface_recv_desc_t *desc;

    rx_wqes = iface->rx.wq.wqes;
    pi      = iface->rx.wq.rq_wqe_counter & iface->rx.wq.mask;

    for (count = 0; count < batch; count ++) {
        next_pi = (pi + 1) &  iface->rx.wq.mask;
        ucs_prefetch(rx_wqes + next_pi);
        UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, &iface->super.rx.mp,
                                 desc, break);
        rx_wqes[pi].lkey = htonl(desc->lkey);
        rx_wqes[pi].addr = htonll((uintptr_t)uct_ib_iface_recv_desc_hdr(&iface->super.super, desc));
        pi = next_pi;
    }
    if (ucs_unlikely(count == 0)) {
        ucs_error("iface(%p) failed to post receive wqes", iface);
        return;
    }
    pi = iface->rx.wq.rq_wqe_counter + count;
    iface->rx.wq.rq_wqe_counter = pi;
    iface->super.rx.available -= count;
    ucs_memory_cpu_fence();
    *iface->rx.wq.dbrec = htonl(pi);
}
예제 #28
0
ucs_status_t ucp_wireup_msg_progress(uct_pending_req_t *self)
{
    ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct);
    ucp_ep_h ep = req->send.ep;
    ssize_t packed_len;

    if (req->send.wireup.type == UCP_WIREUP_MSG_REQUEST) {
        if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) {
            ucs_trace("ep %p: not sending wireup message - remote already connected",
                      ep);
            goto out;
        }
    }

    /* send the active message */
    if (req->send.wireup.type == UCP_WIREUP_MSG_ACK) {
        req->send.lane = ucp_ep_get_am_lane(ep);
    } else {
        req->send.lane = ucp_ep_get_wireup_msg_lane(ep);
    }
    packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_WIREUP,
                                 ucp_wireup_msg_pack, req);
    if (packed_len < 0) {
        if (packed_len != UCS_ERR_NO_RESOURCE) {
            ucs_error("failed to send wireup: %s", ucs_status_string(packed_len));
        }
        return (ucs_status_t)packed_len;
    }

out:
    ucp_request_complete_send(req, UCS_OK);
    return UCS_OK;
}
예제 #29
0
파일: ugni_iface.c 프로젝트: hjelmn/ucx
ucs_status_t uct_ugni_query_tl_resources(uct_pd_h pd, const char *tl_name,
                                         uct_tl_resource_desc_t **resource_p,
                                         unsigned *num_resources_p)
{
    uct_tl_resource_desc_t *resources;
    int num_devices = job_info.num_devices;
    uct_ugni_device_t *devs = job_info.devices;
    int i;
    ucs_status_t status = UCS_OK;

    pthread_mutex_lock(&uct_ugni_global_lock);

    resources = ucs_calloc(job_info.num_devices, sizeof(uct_tl_resource_desc_t),
                          "resource desc");
    if (NULL == resources) {
      ucs_error("Failed to allocate memory");
      num_devices = 0;
      resources = NULL;
      status = UCS_ERR_NO_MEMORY;
      goto error;
    }

    for (i = 0; i < job_info.num_devices; i++) {
        uct_ugni_device_get_resource(tl_name, &devs[i], &resources[i]);
    }

error:
    *num_resources_p = num_devices;
    *resource_p      = resources;
    pthread_mutex_unlock(&uct_ugni_global_lock);

    return status;
}
예제 #30
0
static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface,
                                              uct_ugni_ep_t *ep,
                                              uct_ugni_base_desc_t *rdma)
{
    gni_return_t ugni_rc;

    if (ucs_unlikely(!uct_ugni_can_send(ep))) {
        ucs_mpool_put(rdma);
        return UCS_ERR_NO_RESOURCE;
    }

    ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_mpool_put(rdma);
        if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) {
            ucs_debug("GNI_PostRdma failed, Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
            return UCS_ERR_NO_RESOURCE;
        } else {
            ucs_error("GNI_PostRdma failed, Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
            return UCS_ERR_IO_ERROR;
        }
    }

    ++ep->outstanding;
    ++iface->super.outstanding;

    return UCS_INPROGRESS;
}