static void __gnix_cm_nic_timeout_progress(void *data) { int ret; struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)data; ret = _gnix_cm_nic_progress(cm_nic); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_progress returned %s\n", fi_strerror(-ret)); }
Test(dg_allocation, dgram_wc_post_exchg) { int ret = 0; struct gnix_cm_nic *cm_nic; struct gnix_datagram *dgram_wc, *dgram_bnd; ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); cm_nic = ep_priv->cm_nic; cr_assert((cm_nic != NULL), "cm_nic NULL"); cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, &dgram_wc); cr_assert(!ret, "_gnix_dgram_alloc wc"); dgram_wc->callback_fn = dgram_callback_fn; ret = _gnix_dgram_wc_post(dgram_wc); cr_assert((ret == 0), "_gnix_dgram_alloc wc"); ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, &dgram_bnd); cr_assert((ret == 0), "_gnix_dgram_alloc bnd"); dgram_bnd->target_addr = cm_nic->my_name.gnix_addr; local_address = cm_nic->my_name.gnix_addr; dgram_bnd->callback_fn = dgram_callback_fn; ret = _gnix_dgram_bnd_post(dgram_bnd); cr_assert(ret == 0); /* * progress auto, don't need to do anything */ while (dgram_match != 1) { ret = _gnix_cm_nic_progress(cm_nic); cr_assert(ret == 0); pthread_yield(); } ret = _gnix_dgram_free(dgram_bnd); cr_assert(!ret, "_gnix_dgram_free bnd"); ret = _gnix_dgram_free(dgram_wc); cr_assert(!ret, "_gnix_dgram_free wc"); }
/* Return 0 if VC is connected. Progress VC CM if not. */ static int __gnix_vc_connected(struct gnix_vc *vc) { struct gnix_cm_nic *cm_nic; int ret; if (unlikely(vc->conn_state < GNIX_VC_CONNECTED)) { cm_nic = vc->ep->cm_nic; ret = _gnix_cm_nic_progress(cm_nic); if ((ret != FI_SUCCESS) && (ret != -FI_EAGAIN)) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_progress() failed: %s\n", fi_strerror(-ret)); /* waiting to connect, check back later */ return -FI_EAGAIN; } return 0; }
static int __gnix_vc_hndl_conn_req(struct gnix_cm_nic *cm_nic, char *msg_buffer, struct gnix_address src_cm_nic_addr) { int ret = FI_SUCCESS; gni_return_t __attribute__((unused)) status; struct gnix_fid_ep *ep = NULL; gnix_ht_key_t *key_ptr; struct gnix_av_addr_entry entry; struct gnix_address src_addr, target_addr; struct gnix_vc *vc = NULL; struct gnix_vc *vc_try = NULL; struct gnix_work_req *work_req; int src_vc_id; gni_smsg_attr_t src_smsg_attr; uint64_t src_vc_ptr; struct wq_hndl_conn_req *data = NULL; ssize_t __attribute__((unused)) len; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); /* * unpack the message */ __gnix_vc_unpack_conn_req(msg_buffer, &target_addr, &src_addr, &src_vc_id, &src_vc_ptr, &src_smsg_attr); GNIX_DEBUG(FI_LOG_EP_CTRL, "conn req rx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d src vc 0x%lx )\n", src_addr.device_addr, src_addr.cdm_id, target_addr.device_addr, target_addr.cdm_id, src_vc_ptr); /* * lookup the ep from the addr_to_ep_ht using the target_addr * in the datagram */ key_ptr = (gnix_ht_key_t *)&target_addr; ep = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht, *key_ptr); if (ep == NULL) { GNIX_WARN(FI_LOG_EP_DATA, "_gnix_ht_lookup addr_to_ep failed\n"); ret = -FI_ENOENT; goto err; } /* * look to see if there is a VC already for the * address of the connecting EP. */ key_ptr = (gnix_ht_key_t *)&src_addr; fastlock_acquire(&ep->vc_ht_lock); vc = (struct gnix_vc *)_gnix_ht_lookup(ep->vc_ht, *key_ptr); /* * if there is no corresponding vc in the hash, * or there is an entry and its not in connecting state * go down the conn req ack route. */ if ((vc == NULL) || (vc->conn_state == GNIX_VC_CONN_NONE)) { if (vc == NULL) { entry.gnix_addr = src_addr; entry.cm_nic_cdm_id = src_cm_nic_addr.cdm_id; ret = _gnix_vc_alloc(ep, &entry, &vc_try); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_vc_alloc returned %s\n", fi_strerror(-ret)); goto err; } vc_try->conn_state = GNIX_VC_CONNECTING; ret = _gnix_ht_insert(ep->vc_ht, *key_ptr, vc_try); if (likely(ret == FI_SUCCESS)) { vc = vc_try; vc->modes |= GNIX_VC_MODE_IN_HT; } else if (ret == -FI_ENOSPC) { _gnix_vc_destroy(vc_try); } else { GNIX_WARN(FI_LOG_EP_DATA, "_gnix_ht_insert returned %s\n", fi_strerror(-ret)); goto err; } } else vc->conn_state = GNIX_VC_CONNECTING; /* * prepare a work request to * initiate an request response */ work_req = calloc(1, sizeof(*work_req)); if (work_req == NULL) { ret = -FI_ENOMEM; goto err; } data = calloc(1, sizeof(struct wq_hndl_conn_req)); if (data == NULL) { ret = -FI_ENOMEM; goto err; } memcpy(&data->src_smsg_attr, &src_smsg_attr, sizeof(src_smsg_attr)); data->vc = vc; data->src_vc_id = src_vc_id; data->src_vc_ptr = src_vc_ptr; work_req->progress_fn = __gnix_vc_conn_ack_prog_fn; work_req->data = data; work_req->completer_fn = __gnix_vc_conn_ack_comp_fn; work_req->completer_data = data; /* * add the work request to the tail of the * cm_nic's work queue, progress the cm_nic. */ fastlock_acquire(&cm_nic->wq_lock); dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq); fastlock_release(&cm_nic->wq_lock); fastlock_release(&ep->vc_ht_lock); _gnix_vc_schedule(vc); ret = _gnix_cm_nic_progress(cm_nic); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_progress returned %s\n", fi_strerror(-ret)); } else { /* * we can only be in connecting state if we * reach here. We have all the informatinon, * and the other side will get the information * at some point, so go ahead and build SMSG connection. */ if (vc->conn_state != GNIX_VC_CONNECTING) { GNIX_WARN(FI_LOG_EP_CTRL, "vc %p not in connecting state nor in cm wq\n", vc, vc->conn_state); ret = -FI_EINVAL; goto err; } ret = __gnix_vc_smsg_init(vc, src_vc_id, &src_smsg_attr); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_vc_smsg_init returned %s\n", fi_strerror(-ret)); goto err; } vc->conn_state = GNIX_VC_CONNECTED; GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n", vc); fastlock_release(&ep->vc_ht_lock); ret = _gnix_vc_schedule(vc); ret = _gnix_cm_nic_progress(cm_nic); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_progress returned %s\n", fi_strerror(-ret)); } err: return ret; }
static int __process_datagram(struct gnix_datagram *dgram, struct gnix_address peer_address, gni_post_state_t state) { int ret = FI_SUCCESS; struct gnix_cm_nic *cm_nic = NULL; uint8_t in_tag = 0, out_tag = 0; char rcv_buf[GNIX_CM_NIC_MAX_MSG_SIZE]; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); cm_nic = (struct gnix_cm_nic *)dgram->cache; if (cm_nic == NULL) { GNIX_WARN(FI_LOG_EP_CTRL, "process_datagram, null cache\n"); goto err; } if (state != GNI_POST_COMPLETED) { ret = __process_dgram_w_error(cm_nic, dgram, peer_address, state); GNIX_WARN(FI_LOG_EP_CTRL, "process_datagram bad post state %d\n", state); goto err; } __dgram_get_in_tag(dgram, &in_tag); if ((in_tag != GNIX_CM_NIC_BND_TAG) && (in_tag != GNIX_CM_NIC_WC_TAG)) { GNIX_WARN(FI_LOG_EP_CTRL, "datagram with unknown in tag %d\n", in_tag); goto err; } __dgram_unpack_out_tag(dgram, &out_tag); if ((out_tag != GNIX_CM_NIC_BND_TAG) && (out_tag != GNIX_CM_NIC_WC_TAG)) { GNIX_WARN(FI_LOG_EP_CTRL, "datagram with unknown out tag %d\n", out_tag); goto err; } /* * if out buf actually has data, call consumer's * receive callback */ if (out_tag == GNIX_CM_NIC_BND_TAG) { _gnix_dgram_unpack_buf(dgram, GNIX_DGRAM_OUT_BUF, rcv_buf, GNIX_CM_NIC_MAX_MSG_SIZE); ret = cm_nic->rcv_cb_fn(cm_nic, rcv_buf, peer_address); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "cm_nic->rcv_cb_fn returned %s\n", fi_strerror(-ret)); goto err; } ret = _gnix_cm_nic_progress(cm_nic); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_cm_nic_progress returned %s\n", fi_strerror(-ret)); } /* * if we are processing a WC datagram, repost, otherwise * just put back on the freelist. */ if (in_tag == GNIX_CM_NIC_WC_TAG) { dgram->callback_fn = __process_datagram; dgram->cache = cm_nic; __dgram_set_tag(dgram, in_tag); ret = _gnix_dgram_wc_post(dgram); if (ret != FI_SUCCESS) { GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_dgram_wc_post returned %s\n", fi_strerror(-ret)); goto err; } } else { ret = _gnix_dgram_free(dgram); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_dgram_free returned %s\n", fi_strerror(-ret)); } return ret; err: if (in_tag == GNIX_CM_NIC_BND_TAG) _gnix_dgram_free(dgram); return ret; }
ssize_t _gnix_atomic(struct gnix_fid_ep *ep, enum gnix_fab_req_type fr_type, const struct fi_msg_atomic *msg, const struct fi_ioc *comparev, void **compare_desc, size_t compare_count, struct fi_ioc *resultv, void **result_desc, size_t result_count, uint64_t flags) { struct gnix_vc *vc; struct gnix_fab_req *req; struct gnix_fid_mem_desc *md = NULL; int rc, len; struct fid_mr *auto_mr = NULL; void *mdesc = NULL; uint64_t compare_operand = 0; void *loc_addr = NULL; int dt_len, dt_align; int connected; if (!(flags & FI_INJECT) && !ep->send_cq && (((fr_type == GNIX_FAB_RQ_AMO || fr_type == GNIX_FAB_RQ_NAMO_AX || fr_type == GNIX_FAB_RQ_NAMO_AX_S) && !ep->write_cntr) || ((fr_type == GNIX_FAB_RQ_FAMO || fr_type == GNIX_FAB_RQ_CAMO || fr_type == GNIX_FAB_RQ_NAMO_FAX || fr_type == GNIX_FAB_RQ_NAMO_FAX_S) && !ep->read_cntr))) { return -FI_ENOCQ; } if (!ep || !msg || !msg->msg_iov || msg->msg_iov[0].count != 1 || msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT || !msg->rma_iov) return -FI_EINVAL; /* * see fi_atomic man page */ if ((msg->op != FI_ATOMIC_READ) && !msg->msg_iov[0].addr) return -FI_EINVAL; if (flags & FI_TRIGGER) { struct fi_triggered_context *trigger_context = (struct fi_triggered_context *)msg->context; if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) || (flags & FI_INJECT)) { return -FI_EINVAL; } } if (fr_type == GNIX_FAB_RQ_CAMO) { if (!comparev || !comparev[0].addr || compare_count != 1) return -FI_EINVAL; compare_operand = *(uint64_t *)comparev[0].addr; } dt_len = ofi_datatype_size(msg->datatype); dt_align = dt_len - 1; len = dt_len * msg->msg_iov->count; if (msg->rma_iov->addr & dt_align) { GNIX_INFO(FI_LOG_EP_DATA, "Invalid target alignment: %d (mask 0x%x)\n", msg->rma_iov->addr, dt_align); return -FI_EINVAL; } /* need a memory descriptor for all fetching and comparison AMOs */ if (fr_type == GNIX_FAB_RQ_FAMO || fr_type == GNIX_FAB_RQ_CAMO || fr_type == GNIX_FAB_RQ_NAMO_FAX || fr_type == GNIX_FAB_RQ_NAMO_FAX_S) { if (!resultv || !resultv[0].addr || result_count != 1) return -FI_EINVAL; loc_addr = resultv[0].addr; if ((uint64_t)loc_addr & dt_align) { GNIX_INFO(FI_LOG_EP_DATA, "Invalid source alignment: %d (mask 0x%x)\n", loc_addr, dt_align); return -FI_EINVAL; } if (!result_desc || !result_desc[0]) { rc = _gnix_mr_reg(&ep->domain->domain_fid.fid, loc_addr, len, FI_READ | FI_WRITE, 0, 0, 0, &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); if (rc != FI_SUCCESS) { GNIX_INFO(FI_LOG_EP_DATA, "Failed to auto-register local buffer: %d\n", rc); return rc; } flags |= FI_LOCAL_MR; mdesc = (void *)auto_mr; GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); } else { mdesc = result_desc[0]; } } /* setup fabric request */ req = _gnix_fr_alloc(ep); if (!req) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n"); rc = -FI_ENOSPC; goto err_fr_alloc; } req->type = fr_type; req->gnix_ep = ep; req->user_context = msg->context; req->work_fn = _gnix_amo_post_req; if (mdesc) { md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); } req->amo.loc_md = (void *)md; req->amo.loc_addr = (uint64_t)loc_addr; if ((fr_type == GNIX_FAB_RQ_NAMO_AX) || (fr_type == GNIX_FAB_RQ_NAMO_FAX) || (fr_type == GNIX_FAB_RQ_NAMO_AX_S) || (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) { req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr; req->amo.second_operand = *((uint64_t *)(msg->msg_iov[0].addr) + 1); } else if (msg->op == FI_ATOMIC_READ) { req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */ } else if (msg->op == FI_CSWAP) { req->amo.first_operand = compare_operand; req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; } else if (msg->op == FI_MSWAP) { req->amo.first_operand = ~compare_operand; req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; req->amo.second_operand &= compare_operand; } else { req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr; } req->amo.rem_addr = msg->rma_iov->addr; req->amo.rem_mr_key = msg->rma_iov->key; req->amo.len = len; req->amo.imm = msg->data; req->amo.datatype = msg->datatype; req->amo.op = msg->op; req->flags = flags; /* Inject interfaces always suppress completions. If * SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always * deliver a completion. */ if ((flags & GNIX_SUPPRESS_COMPLETION) || (ep->send_selective_completion && !(flags & FI_COMPLETION))) { req->flags &= ~FI_COMPLETION; } else { req->flags |= FI_COMPLETION; } COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); /* find VC for target */ rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc); if (rc) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n", msg->addr, rc); goto err_get_vc; } req->vc = vc; rc = _gnix_vc_queue_tx_req(req); connected = (vc->conn_state == GNIX_VC_CONNECTED); COND_RELEASE(ep->requires_lock, &ep->vc_lock); /* *If a new VC was allocated, progress CM before returning. * If the VC is connected and there's a backlog, poke * the nic progress engine befure returning. */ if (!connected) { _gnix_cm_nic_progress(ep->cm_nic); } else if (!dlist_empty(&vc->tx_queue)) { _gnix_nic_progress(vc->ep->nic); } return rc; err_get_vc: COND_RELEASE(ep->requires_lock, &ep->vc_lock); err_fr_alloc: if (auto_mr) { fi_close(&auto_mr->fid); } return rc; }