Beispiel #1
0
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
                                gni_ep_handle_t *ep_handle)
{
    gni_return_t grc;

    if (OPAL_UNLIKELY(NULL == cep)) {
        assert (0);
        return OPAL_ERR_BAD_PARAM;
    }

    /* create a uGNI endpoint handle and bind it to the remote peer */
    OPAL_THREAD_LOCK(&cep->dev->dev_lock);
    grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
    OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
        return opal_common_rc_ugni_to_opal (grc);
    }

    OPAL_THREAD_LOCK(&cep->dev->dev_lock);
    grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
    OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);

    if (GNI_RC_SUCCESS != grc) {
        OPAL_THREAD_LOCK(&cep->dev->dev_lock);
        GNI_EpDestroy (*ep_handle);
        OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
        return opal_common_rc_ugni_to_opal (grc);
    }

    return OPAL_SUCCESS;
}
Beispiel #2
0
/* Endpoint definition */
UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, const uct_ep_params_t *params)
{
    uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t);
    ucs_status_t rc = UCS_OK;
    gni_return_t ugni_rc;
    uint32_t *big_hash;

    self->arb_sched = 0;
    UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super);
    self->flush_group = uct_ugni_new_flush_group(iface);
#ifdef DEBUG
    self->flush_group->flush_comp.func = NULL;
    self->flush_group->parent = NULL;
#endif
    uct_ugni_cdm_lock(&iface->cdm);
    ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep);
    uct_ugni_cdm_unlock(&iface->cdm);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CdmCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }
    ucs_arbiter_group_init(&self->arb_group);
    big_hash = (void *)&self->ep;
    self->hash_key = big_hash[0];
    if (uct_ugni_check_device_type(iface, GNI_DEVICE_ARIES)) {
        self->hash_key &= 0x00FFFFFF;
    }
    ucs_debug("Adding ep hash %x to iface %p", self->hash_key, iface);
    sglib_hashed_uct_ugni_ep_t_add(iface->eps, self);

    return rc;
}
Beispiel #3
0
/* Endpoint definition */
UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, uct_iface_t *tl_iface,
                    const struct sockaddr *addr)
{
    uct_ugni_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_iface_t);
    const uct_sockaddr_ugni_t *iface_addr = (const uct_sockaddr_ugni_t*)addr;
    ucs_status_t rc = UCS_OK;
    gni_return_t ugni_rc;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super);

    ugni_rc = GNI_EpCreate(iface->nic_handle, iface->local_cq, &self->ep);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CdmCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }

    if(NULL != addr){
        rc = ugni_connect_ep(iface, iface_addr, self);
    }

    ucs_arbiter_group_init(&self->arb_group);

    uint32_t *big_hash;
    big_hash = (void *)&self->ep;

    self->hash_key = big_hash[0];
    sglib_hashed_uct_ugni_ep_t_add(iface->eps, self);

    return rc;
}
Beispiel #4
0
int mca_btl_ugni_ep_handle_init (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
                                 mca_btl_ugni_device_t *device, mca_btl_ugni_endpoint_handle_t *ep_handle)
{
    gni_return_t grc;

    ep_handle->device = device;

    /* create a uGNI endpoint handle and bind it to the remote peer */
    grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
    if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
        grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
    }

    return mca_btl_rc_ugni_to_opal (grc);
}
int
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
                          ompi_common_ugni_device_t *dev)
{
    int rc;

    BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module,
                 (void *) dev));

    /* copy module defaults (and function pointers) */
    memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));

    ugni_module->initialized = false;
    ugni_module->nlocal_procs = 0;
    ugni_module->active_send_count = 0;

    OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
    OBJ_CONSTRUCT(&ugni_module->eager_frags_send, ompi_free_list_t);
    OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, ompi_free_list_t);
    OBJ_CONSTRUCT(&ugni_module->smsg_frags, ompi_free_list_t);
    OBJ_CONSTRUCT(&ugni_module->rdma_frags, ompi_free_list_t);
    OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, ompi_free_list_t);
    OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
    OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
    OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
    OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);

    ugni_module->device = dev;
    dev->btl_ctx = (void *) ugni_module;

    /* create wildcard endpoint to listen for connections.
     * there is no need to bind this endpoint. */
    rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
                       &ugni_module->wildcard_ep);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
        BTL_ERROR(("error creating wildcard ugni endpoint"));
        return ompi_common_rc_ugni_to_ompi (rc);
    }

    /* post wildcard datagram */
    rc = mca_btl_ugni_wildcard_ep_post (ugni_module);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
        BTL_ERROR(("error posting wildcard datagram"));
        return rc;
    }

    return OMPI_SUCCESS;
}
Beispiel #6
0
/* Before this function is called, you MUST
 * A) Deregister the datagram processing function from the async thread.
 * B) Cancel the wildcard datagram.
 * C) Drain all other messages from the queue.
 */
static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface)
{
    gni_return_t ugni_rc;
    gni_ep_handle_t   ep;

    uct_ugni_device_lock(&iface->super.cdm);
    ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(iface), iface->super.local_cq, &ep);
    if (GNI_RC_SUCCESS != ugni_rc) {
        uct_ugni_device_unlock(&iface->super.cdm);
        ucs_error("GNI_EpCreate, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }
    ugni_rc = GNI_EpBind(ep, iface->super.cdm.dev->address, iface->super.cdm.domain_id);
    if (GNI_RC_SUCCESS != ugni_rc) {
        GNI_EpDestroy(ep);
        uct_ugni_device_unlock(&iface->super.cdm);
        ucs_error("GNI_EpBind failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return;
    }
    ugni_rc = GNI_EpPostDataWId(ep,
                                NULL, 0,
                                NULL, 0,
                                UCT_UGNI_UDT_CANCEL);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("Couldn't send cancel message to UGNI interface! %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
    }
    /* When the gni_ep is destroyed the above post will be canceled */
    ugni_rc = GNI_EpDestroy(ep);
    uct_ugni_device_unlock(&iface->super.cdm);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpDestroy failed, Error status: %s %d\n",
                  gni_err_str[ugni_rc], ugni_rc);
    }
}
Beispiel #7
0
static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker,
                           const uct_iface_params_t *params,
                           const uct_iface_config_t *tl_config)
{
    uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t);
    ucs_status_t status;
    uct_ugni_udt_desc_t *desc;
    gni_return_t ugni_rc;
    int rc;

    UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params,
                              &uct_ugni_udt_iface_ops,
                              &config->super UCS_STATS_ARG(NULL));

    /* Setting initial configuration */
    self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE;
    self->config.rx_headroom  = params->rx_headroom;
    self->release_desc.cb     = uct_ugni_udt_iface_release_desc;

    status = ucs_async_pipe_create(&self->event_pipe);
    if (UCS_OK != status) {
        ucs_error("Pipe creation failed");
        goto exit;
    }

    status = ucs_mpool_init(&self->free_desc,
                            0,
                            uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2,
                            uct_ugni_udt_get_diff(self),
                            UCS_SYS_CACHE_LINE_SIZE,      /* alignment */
                            128,                          /* grow */
                            config->mpool.max_bufs,       /* max buffers */
                            &uct_ugni_udt_desc_mpool_ops,
                            "UGNI-UDT-DESC");

    if (UCS_OK != status) {
        ucs_error("Mpool creation failed");
        goto clean_pipe;
    }

    ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_EpCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        status = UCS_ERR_NO_DEVICE;
        goto clean_free_desc;
    }

    UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc,
                             desc, goto clean_ep);
    ucs_debug("First wildcard desc is %p", desc);

    /* Init any desc */
    self->desc_any = desc;
    status = uct_ugni_udt_ep_any_post(self);
    if (UCS_OK != status) {
        /* We can't continue if we can't post the first receive */
        ucs_error("Failed to post wildcard request");
        goto clean_any_desc;
    }

    status = ucs_async_set_event_handler(self->super.super.worker->async->mode,
                                         ucs_async_pipe_rfd(&self->event_pipe),
                                         POLLIN,
                                         uct_ugni_proccess_datagram_pipe,
                                         self, self->super.super.worker->async);
                                 
    if (UCS_OK != status) {
        goto clean_cancel_desc;
    }

    pthread_mutex_init(&self->device_lock, NULL);
    pthread_cond_init(&self->device_condition, NULL);
    self->events_ready = 0;

    rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self);
    if(0 != rc) {
        goto clean_remove_event;
    }

    return UCS_OK;

 clean_remove_event:
    ucs_async_pipe_destroy(&self->event_pipe);
 clean_cancel_desc:
    uct_ugni_udt_clean_wildcard(self);
 clean_any_desc:
    ucs_mpool_put(self->desc_any);
 clean_ep:
    ugni_rc = GNI_EpDestroy(self->ep_any);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_warn("GNI_EpDestroy failed, Error status: %s %d",
                 gni_err_str[ugni_rc], ugni_rc);
    }
 clean_free_desc:
    ucs_mpool_cleanup(&self->free_desc, 1);
 clean_pipe:
    ucs_async_pipe_destroy(&self->event_pipe);
 exit:
    uct_ugni_cleanup_base_iface(&self->super);
    ucs_error("Failed to activate interface");
    return status;
}
Beispiel #8
0
/*
 * helper function to initialize an SMSG connection
 */
static int __gnix_vc_smsg_init(struct gnix_vc *vc,
				int peer_id,
				gni_smsg_attr_t *peer_smsg_attr)
{
	int ret = FI_SUCCESS;
	struct gnix_fid_ep *ep;
	struct gnix_fid_domain *dom;
	struct gnix_mbox *mbox = NULL;
	gni_smsg_attr_t local_smsg_attr;
	gni_return_t __attribute__((unused)) status;
	ssize_t __attribute__((unused)) len;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	assert(vc);

	ep = vc->ep;
	assert(ep);

	dom = ep->domain;
	if (dom == NULL)
		return -FI_EINVAL;

	mbox = vc->smsg_mbox;
	assert (mbox);

	local_smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	local_smsg_attr.msg_buffer = mbox->base;
	local_smsg_attr.buff_size =  vc->ep->nic->mem_per_mbox;
	local_smsg_attr.mem_hndl = *mbox->memory_handle;
	local_smsg_attr.mbox_offset = (uint64_t)mbox->offset;
	local_smsg_attr.mbox_maxcredit = dom->params.mbox_maxcredit;
	local_smsg_attr.msg_maxsize = dom->params.mbox_msg_maxsize;

	/*
	 *  now build the SMSG connection
	 */

	fastlock_acquire(&ep->nic->lock);

	status = GNI_EpCreate(ep->nic->gni_nic_hndl,
			      ep->nic->tx_cq,
			      &vc->gni_ep);
	if (status != GNI_RC_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"GNI_EpCreate returned %s\n", gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err;
	}

	status = GNI_EpBind(vc->gni_ep,
			    vc->peer_addr.device_addr,
			    vc->peer_addr.cdm_id);
	if (status != GNI_RC_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "GNI_EpBind returned %s\n", gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err1;
	}

	status = GNI_SmsgInit(vc->gni_ep,
			      &local_smsg_attr,
			      peer_smsg_attr);
	if (status != GNI_RC_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			"GNI_SmsgInit returned %s\n", gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err1;
	}

	status = GNI_EpSetEventData(vc->gni_ep,
				    vc->vc_id,
				    peer_id);
	if (status != GNI_RC_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "GNI_EpSetEventData returned %s\n",
			   gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err1;
	}

	fastlock_release(&ep->nic->lock);
	return ret;
err1:
	GNI_EpDestroy(vc->gni_ep);
err:
	fastlock_release(&ep->nic->lock);
	return ret;
}