ucs_status_t uct_ugni_init_nic(int device_index, int *domain_id, gni_cdm_handle_t *cdm_handle, gni_nic_handle_t *nic_handle, uint32_t *address) { int modes; ucs_status_t rc; gni_return_t ugni_rc = GNI_RC_SUCCESS; rc = uct_ugni_fetch_pmi(); if (UCS_OK != rc) { ucs_error("Failed to activate context, Error status: %d", rc); return rc; } *domain_id = job_info.pmi_rank_id + job_info.pmi_num_of_ranks * ugni_domain_global_counter; modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL; ucs_debug("Creating new PD domain with id %d (%d + %d * %d)", *domain_id, job_info.pmi_rank_id, job_info.pmi_num_of_ranks, ugni_domain_global_counter); ugni_rc = GNI_CdmCreate(*domain_id, job_info.ptag, job_info.cookie, modes, cdm_handle); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } /* For now we use the first device for allocation of the domain */ ugni_rc = GNI_CdmAttach(*cdm_handle, job_info.devices[device_index].device_id, address, nic_handle); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmAttach failed (domain id %d, %d), Error status: %s %d", *domain_id, ugni_domain_global_counter, gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_NO_DEVICE; } ++ugni_domain_global_counter; return UCS_OK; }
int opal_common_ugni_init (void) { int modes, rc, i; uint32_t my_cdm_id; opal_common_ugni_module_ref_count ++; if (opal_common_ugni_module_ref_count > 1) { return OPAL_SUCCESS; } /* use pid for my_cdm_id. Although its not stated in the uGNI documentation, the cdm_id only needs to be unique within a node for a given ptag/cookie tuple */ my_cdm_id = getpid(); /*TODO: eventually need something else for thread-hot support */ /* pull settings from ugni btl */ opal_common_ugni_module.rdma_max_retries = mca_btl_ugni_component.rdma_max_retries; /* Create a communication domain */ modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL; /* collect uGNI information */ rc = get_ptag(&opal_common_ugni_module.ptag); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } rc = get_cookie(&opal_common_ugni_module.cookie); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } /* create a communication domain */ rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag, opal_common_ugni_module.cookie, modes, &opal_common_ugni_module.cd_handle); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) { OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc)); return opal_common_rc_ugni_to_opal (rc); } /* setup uGNI devices. we only support one device atm */ opal_common_ugni_module.device_count = 1; opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count, sizeof (opal_common_ugni_device_t)); for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) { rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_OUTPUT((-1, "error initializing uGNI device")); return rc; } } /* send ugni modex */ opal_common_ugni_send_modex (my_cdm_id); return OPAL_SUCCESS; }
int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain, struct gnix_cm_nic **cm_nic_ptr) { int ret = FI_SUCCESS; struct gnix_cm_nic *cm_nic = NULL; uint32_t device_addr, cdm_id; gni_return_t status; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); *cm_nic_ptr = NULL; cm_nic = (struct gnix_cm_nic *)calloc(1, sizeof(*cm_nic)); if (cm_nic == NULL) { ret = -FI_ENOMEM; goto err; } ret = _gnix_get_new_cdm_id(domain, &cdm_id); if (ret != FI_SUCCESS) goto err; GNIX_INFO(FI_LOG_EP_CTRL, "creating cm_nic for %u/0x%x/%u\n", domain->ptag, domain->cookie, cdm_id); status = GNI_CdmCreate(cdm_id, domain->ptag, domain->cookie, gnix_cdm_modes, &cm_nic->gni_cdm_hndl); if (status != GNI_RC_SUCCESS) { GNIX_ERR(FI_LOG_EP_CTRL, "GNI_CdmCreate returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err; } /* * Okay, now go for the attach */ status = GNI_CdmAttach(cm_nic->gni_cdm_hndl, 0, &device_addr, &cm_nic->gni_nic_hndl); if (status != GNI_RC_SUCCESS) { GNIX_ERR(FI_LOG_EP_CTRL, "GNI_CdmAttach returned %s\n", gni_err_str[status]); ret = gnixu_to_fi_errno(status); goto err; } cm_nic->cdm_id = cdm_id; cm_nic->ptag = domain->ptag; cm_nic->cookie = domain->cookie; cm_nic->device_addr = device_addr; cm_nic->control_progress = domain->control_progress; fastlock_init(&cm_nic->lock); fastlock_init(&cm_nic->wq_lock); list_head_init(&cm_nic->cm_nic_wq); /* * prep the cm nic's dgram component */ ret = _gnix_dgram_hndl_alloc(domain->fabric, cm_nic, domain->control_progress, &cm_nic->dgram_hndl); if (ret != FI_SUCCESS) goto err; *cm_nic_ptr = cm_nic; return ret; err: if (cm_nic->dgram_hndl) _gnix_dgram_hndl_free(cm_nic->dgram_hndl); if (cm_nic->gni_cdm_hndl) GNI_CdmDestroy(cm_nic->gni_cdm_hndl); if (cm_nic != NULL) free(cm_nic); return ret; }