/* * this function is intended to be invoked as an argument to pthread_create, */ static void *__gnix_nic_prog_thread_fn(void *the_arg) { int ret = FI_SUCCESS, prev_state; int retry = 0; uint32_t which; struct gnix_nic *nic = (struct gnix_nic *)the_arg; sigset_t sigmask; gni_cq_handle_t cqv[2]; gni_return_t status; gni_cq_entry_t cqe; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); /* * temporarily disable cancelability while we set up * some stuff */ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state); /* * help out Cray core-spec, say we're not an app thread * and can be run on core-spec cpus. */ ret = _gnix_task_is_not_app(); if (ret) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_task_is_not_app call returned %d\n", ret); /* * block all signals, don't want this thread to catch * signals that may be for app threads */ memset(&sigmask, 0, sizeof(sigset_t)); ret = sigfillset(&sigmask); if (ret) { GNIX_WARN(FI_LOG_EP_CTRL, "sigfillset call returned %d\n", ret); } else { ret = pthread_sigmask(SIG_SETMASK, &sigmask, NULL); if (ret) GNIX_WARN(FI_LOG_EP_CTRL, "pthread_sigmask call returned %d\n", ret); } /* * okay now we're ready to be cancelable. */ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state); pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); cqv[0] = nic->tx_cq_blk; cqv[1] = nic->rx_cq_blk; try_again: status = GNI_CqVectorMonitor(cqv, 2, -1, &which); switch (status) { case GNI_RC_SUCCESS: /* * first dequeue RX CQEs */ if (which == 1) { do { status = GNI_CqGetEvent(nic->rx_cq_blk, &cqe); } while (status == GNI_RC_SUCCESS); } _gnix_nic_progress(nic); retry = 1; break; case GNI_RC_TIMEOUT: retry = 1; break; case GNI_RC_NOT_DONE: retry = 1; break; case GNI_RC_INVALID_PARAM: case GNI_RC_INVALID_STATE: case GNI_RC_ERROR_RESOURCE: case GNI_RC_ERROR_NOMEM: retry = 0; GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CqGetEvent returned %s\n", gni_err_str[status]); break; default: retry = 0; GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CqGetEvent returned unexpected code %s\n", gni_err_str[status]); break; } if (retry) goto try_again; return NULL; }
/* Destroy an unconnected VC. More Support is needed to shutdown and destroy * an active VC. */ int _gnix_vc_destroy(struct gnix_vc *vc) { int ret = FI_SUCCESS; struct gnix_nic *nic = NULL; gni_return_t status; GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); if (vc->ep == NULL) { GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n"); return -FI_EINVAL; } nic = vc->ep->nic; if (nic == NULL) { GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc); return -FI_EINVAL; } /* * move vc state to terminating */ vc->conn_state = GNIX_VC_CONN_TERMINATING; /* * try to unbind the gni_ep if non-NULL. * If there are SMSG or PostFMA/RDMA outstanding * wait here for them to complete */ if (vc->gni_ep != NULL) { while (status == GNI_RC_NOT_DONE) { fastlock_acquire(&nic->lock); status = GNI_EpUnbind(vc->gni_ep); fastlock_release(&nic->lock); if ((status != GNI_RC_NOT_DONE) && (status != GNI_RC_SUCCESS)) { GNIX_WARN(FI_LOG_EP_CTRL, "GNI_EpUnBind returned %s\n", gni_err_str[status]); break; } if (status == GNI_RC_NOT_DONE) _gnix_nic_progress(nic); } fastlock_acquire(&nic->lock); status = GNI_EpDestroy(vc->gni_ep); fastlock_release(&nic->lock); if (status != GNI_RC_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "GNI_EpDestroy returned %s\n", gni_err_str[status]); } /* * if the vc is in a nic's work queue, remove it */ __gnix_vc_cancel(vc); /* * We may eventually want to check the state of the VC, if we * implement true VC shutdown. if ((vc->conn_state != GNIX_VC_CONN_NONE) && (vc->conn_state != GNIX_VC_CONN_TERMINATED)) { GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state %d\n", vc->conn_state); GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n"); return -FI_EBUSY; } */ /* * if send_q not empty, return -FI_EBUSY * Note for FI_EP_MSG type eps, this behavior * may not be correct for handling fi_shutdown. */ if (!slist_empty(&vc->tx_queue)) { GNIX_WARN(FI_LOG_EP_CTRL, "vc sendqueue not empty\n"); return -FI_EBUSY; } fastlock_destroy(&vc->tx_queue_lock); if (vc->smsg_mbox != NULL) { ret = _gnix_mbox_free(vc->smsg_mbox); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_mbox_free returned %s\n", fi_strerror(-ret)); vc->smsg_mbox = NULL; } if (vc->dgram != NULL) { ret = _gnix_dgram_free(vc->dgram); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_dgram_free returned %s\n", fi_strerror(-ret)); vc->dgram = NULL; } ret = _gnix_nic_free_rem_id(nic, vc->vc_id); if (ret != FI_SUCCESS) GNIX_WARN(FI_LOG_EP_CTRL, "__gnix_vc_free_id returned %s\n", fi_strerror(-ret)); _gnix_free_bitmap(&vc->flags); free(vc); return ret; }
ssize_t _gnix_atomic(struct gnix_fid_ep *ep, enum gnix_fab_req_type fr_type, const struct fi_msg_atomic *msg, const struct fi_ioc *comparev, void **compare_desc, size_t compare_count, struct fi_ioc *resultv, void **result_desc, size_t result_count, uint64_t flags) { struct gnix_vc *vc; struct gnix_fab_req *req; struct gnix_fid_mem_desc *md = NULL; int rc, len; struct fid_mr *auto_mr = NULL; void *mdesc = NULL; uint64_t compare_operand = 0; void *loc_addr = NULL; int dt_len, dt_align; int connected; if (!(flags & FI_INJECT) && !ep->send_cq && (((fr_type == GNIX_FAB_RQ_AMO || fr_type == GNIX_FAB_RQ_NAMO_AX || fr_type == GNIX_FAB_RQ_NAMO_AX_S) && !ep->write_cntr) || ((fr_type == GNIX_FAB_RQ_FAMO || fr_type == GNIX_FAB_RQ_CAMO || fr_type == GNIX_FAB_RQ_NAMO_FAX || fr_type == GNIX_FAB_RQ_NAMO_FAX_S) && !ep->read_cntr))) { return -FI_ENOCQ; } if (!ep || !msg || !msg->msg_iov || msg->msg_iov[0].count != 1 || msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT || !msg->rma_iov) return -FI_EINVAL; /* * see fi_atomic man page */ if ((msg->op != FI_ATOMIC_READ) && !msg->msg_iov[0].addr) return -FI_EINVAL; if (flags & FI_TRIGGER) { struct fi_triggered_context *trigger_context = (struct fi_triggered_context *)msg->context; if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) || (flags & FI_INJECT)) { return -FI_EINVAL; } } if (fr_type == GNIX_FAB_RQ_CAMO) { if (!comparev || !comparev[0].addr || compare_count != 1) return -FI_EINVAL; compare_operand = *(uint64_t *)comparev[0].addr; } dt_len = ofi_datatype_size(msg->datatype); dt_align = dt_len - 1; len = dt_len * msg->msg_iov->count; if (msg->rma_iov->addr & dt_align) { GNIX_INFO(FI_LOG_EP_DATA, "Invalid target alignment: %d (mask 0x%x)\n", msg->rma_iov->addr, dt_align); return -FI_EINVAL; } /* need a memory descriptor for all fetching and comparison AMOs */ if (fr_type == GNIX_FAB_RQ_FAMO || fr_type == GNIX_FAB_RQ_CAMO || fr_type == GNIX_FAB_RQ_NAMO_FAX || fr_type == GNIX_FAB_RQ_NAMO_FAX_S) { if (!resultv || !resultv[0].addr || result_count != 1) return -FI_EINVAL; loc_addr = resultv[0].addr; if ((uint64_t)loc_addr & dt_align) { GNIX_INFO(FI_LOG_EP_DATA, "Invalid source alignment: %d (mask 0x%x)\n", loc_addr, dt_align); return -FI_EINVAL; } if (!result_desc || !result_desc[0]) { rc = _gnix_mr_reg(&ep->domain->domain_fid.fid, loc_addr, len, FI_READ | FI_WRITE, 0, 0, 0, &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); if (rc != FI_SUCCESS) { GNIX_INFO(FI_LOG_EP_DATA, "Failed to auto-register local buffer: %d\n", rc); return rc; } flags |= FI_LOCAL_MR; mdesc = (void *)auto_mr; GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); } else { mdesc = result_desc[0]; } } /* setup fabric request */ req = _gnix_fr_alloc(ep); if (!req) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n"); rc = -FI_ENOSPC; goto err_fr_alloc; } req->type = fr_type; req->gnix_ep = ep; req->user_context = msg->context; req->work_fn = _gnix_amo_post_req; if (mdesc) { md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); } req->amo.loc_md = (void *)md; req->amo.loc_addr = (uint64_t)loc_addr; if ((fr_type == GNIX_FAB_RQ_NAMO_AX) || (fr_type == GNIX_FAB_RQ_NAMO_FAX) || (fr_type == GNIX_FAB_RQ_NAMO_AX_S) || (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) { req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr; req->amo.second_operand = *((uint64_t *)(msg->msg_iov[0].addr) + 1); } else if (msg->op == FI_ATOMIC_READ) { req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */ } else if (msg->op == FI_CSWAP) { req->amo.first_operand = compare_operand; req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; } else if (msg->op == FI_MSWAP) { req->amo.first_operand = ~compare_operand; req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; req->amo.second_operand &= compare_operand; } else { req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr; } req->amo.rem_addr = msg->rma_iov->addr; req->amo.rem_mr_key = msg->rma_iov->key; req->amo.len = len; req->amo.imm = msg->data; req->amo.datatype = msg->datatype; req->amo.op = msg->op; req->flags = flags; /* Inject interfaces always suppress completions. If * SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always * deliver a completion. */ if ((flags & GNIX_SUPPRESS_COMPLETION) || (ep->send_selective_completion && !(flags & FI_COMPLETION))) { req->flags &= ~FI_COMPLETION; } else { req->flags |= FI_COMPLETION; } COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); /* find VC for target */ rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc); if (rc) { GNIX_INFO(FI_LOG_EP_DATA, "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n", msg->addr, rc); goto err_get_vc; } req->vc = vc; rc = _gnix_vc_queue_tx_req(req); connected = (vc->conn_state == GNIX_VC_CONNECTED); COND_RELEASE(ep->requires_lock, &ep->vc_lock); /* *If a new VC was allocated, progress CM before returning. * If the VC is connected and there's a backlog, poke * the nic progress engine befure returning. */ if (!connected) { _gnix_cm_nic_progress(ep->cm_nic); } else if (!dlist_empty(&vc->tx_queue)) { _gnix_nic_progress(vc->ep->nic); } return rc; err_get_vc: COND_RELEASE(ep->requires_lock, &ep->vc_lock); err_fr_alloc: if (auto_mr) { fi_close(&auto_mr->fid); } return rc; }