示例#1
0
ucs_status_t uct_ugni_init_nic(int device_index,
                               int *domain_id,
                               gni_cdm_handle_t *cdm_handle,
                               gni_nic_handle_t *nic_handle,
                               uint32_t *address)
{
    int modes;
    ucs_status_t rc;
    gni_return_t ugni_rc = GNI_RC_SUCCESS;

    rc = uct_ugni_fetch_pmi();
    if (UCS_OK != rc) {
        ucs_error("Failed to activate context, Error status: %d", rc);
        return rc;
    }

    *domain_id = job_info.pmi_rank_id + job_info.pmi_num_of_ranks * ugni_domain_global_counter;
    modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
        GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL;
    ucs_debug("Creating new PD domain with id %d (%d + %d * %d)",
              *domain_id, job_info.pmi_rank_id,
              job_info.pmi_num_of_ranks, ugni_domain_global_counter);
    ugni_rc = GNI_CdmCreate(*domain_id, job_info.ptag, job_info.cookie,
                            modes, cdm_handle);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CdmCreate failed, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }

    /* For now we use the first device for allocation of the domain */
    ugni_rc = GNI_CdmAttach(*cdm_handle, job_info.devices[device_index].device_id,
                            address, nic_handle);
    if (GNI_RC_SUCCESS != ugni_rc) {
        ucs_error("GNI_CdmAttach failed (domain id %d, %d), Error status: %s %d",
                  *domain_id, ugni_domain_global_counter, gni_err_str[ugni_rc], ugni_rc);
        return UCS_ERR_NO_DEVICE;
    }

    ++ugni_domain_global_counter;
    return UCS_OK;
}
示例#2
0
文件: common_ugni.c 项目: AT95/ompi
int opal_common_ugni_init (void)
{
    int modes, rc, i;
    uint32_t my_cdm_id;

    opal_common_ugni_module_ref_count ++;

    if (opal_common_ugni_module_ref_count > 1) {
        return OPAL_SUCCESS;
    }

    /* use pid for my_cdm_id.  Although its not stated in the uGNI
       documentation, the cdm_id only needs to be unique
       within a node for a given ptag/cookie tuple */

    my_cdm_id = getpid();   /*TODO: eventually need something else for thread-hot support */

    /* pull settings from ugni btl */
    opal_common_ugni_module.rdma_max_retries =
        mca_btl_ugni_component.rdma_max_retries;

    /* Create a communication domain */

    modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
            GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL;

    /* collect uGNI information */
    rc = get_ptag(&opal_common_ugni_module.ptag);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    rc = get_cookie(&opal_common_ugni_module.cookie);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    /* create a communication domain */
    rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag,
                        opal_common_ugni_module.cookie, modes,
                        &opal_common_ugni_module.cd_handle);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
        OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
        return opal_common_rc_ugni_to_opal (rc);
    }

    /* setup uGNI devices. we only support one device atm */
    opal_common_ugni_module.device_count = 1;
    opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count,
                                              sizeof (opal_common_ugni_device_t));

    for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
        rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            OPAL_OUTPUT((-1, "error initializing uGNI device"));
            return rc;
        }
    }

    /* send ugni modex */
    opal_common_ugni_send_modex (my_cdm_id);

    return OPAL_SUCCESS;
}
示例#3
0
int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain,
			struct gnix_cm_nic **cm_nic_ptr)
{
	int ret = FI_SUCCESS;
	struct gnix_cm_nic *cm_nic = NULL;
	uint32_t device_addr, cdm_id;
	gni_return_t status;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	*cm_nic_ptr = NULL;

	cm_nic = (struct gnix_cm_nic *)calloc(1, sizeof(*cm_nic));
	if (cm_nic == NULL) {
		ret = -FI_ENOMEM;
		goto err;
	}

	ret = _gnix_get_new_cdm_id(domain, &cdm_id);
	if (ret != FI_SUCCESS)
		goto err;

	GNIX_INFO(FI_LOG_EP_CTRL, "creating cm_nic for %u/0x%x/%u\n",
		      domain->ptag, domain->cookie, cdm_id);

	status = GNI_CdmCreate(cdm_id,
			       domain->ptag,
			       domain->cookie,
			       gnix_cdm_modes,
			       &cm_nic->gni_cdm_hndl);
	if (status != GNI_RC_SUCCESS) {
		GNIX_ERR(FI_LOG_EP_CTRL, "GNI_CdmCreate returned %s\n",
			       gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err;
	}

	/*
	 * Okay, now go for the attach
	 */
	status = GNI_CdmAttach(cm_nic->gni_cdm_hndl, 0, &device_addr,
			       &cm_nic->gni_nic_hndl);
	if (status != GNI_RC_SUCCESS) {
		GNIX_ERR(FI_LOG_EP_CTRL, "GNI_CdmAttach returned %s\n",
		       gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err;
	}

	cm_nic->cdm_id = cdm_id;
	cm_nic->ptag = domain->ptag;
	cm_nic->cookie = domain->cookie;
	cm_nic->device_addr = device_addr;
	cm_nic->control_progress = domain->control_progress;
	fastlock_init(&cm_nic->lock);
	fastlock_init(&cm_nic->wq_lock);
	list_head_init(&cm_nic->cm_nic_wq);

	/*
	 * prep the cm nic's dgram component
	 */
	ret = _gnix_dgram_hndl_alloc(domain->fabric,
				     cm_nic,
				     domain->control_progress,
				     &cm_nic->dgram_hndl);
	if (ret != FI_SUCCESS)
		goto err;

	*cm_nic_ptr = cm_nic;
	return ret;

err:
	if (cm_nic->dgram_hndl)
		_gnix_dgram_hndl_free(cm_nic->dgram_hndl);

	if (cm_nic->gni_cdm_hndl)
		GNI_CdmDestroy(cm_nic->gni_cdm_hndl);

	if (cm_nic != NULL)
		free(cm_nic);

	return ret;
}