예제 #1
0
/*
 * this function is intended to be invoked as an argument to pthread_create,
 */
static void *__gnix_nic_prog_thread_fn(void *the_arg)
{
	int ret = FI_SUCCESS, prev_state;
	int retry = 0;
	uint32_t which;
	struct gnix_nic *nic = (struct gnix_nic *)the_arg;
	sigset_t  sigmask;
	gni_cq_handle_t cqv[2];
	gni_return_t status;
	gni_cq_entry_t cqe;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	/*
	 * temporarily disable cancelability while we set up
	 * some stuff
	 */

	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state);

	/*
	 * help out Cray core-spec, say we're not an app thread
	 * and can be run on core-spec cpus.
	 */

	ret = _gnix_task_is_not_app();
	if (ret)
		GNIX_WARN(FI_LOG_EP_CTRL,
			"_gnix_task_is_not_app call returned %d\n",
			ret);

	/*
	 * block all signals, don't want this thread to catch
	 * signals that may be for app threads
	 */

	memset(&sigmask, 0, sizeof(sigset_t));
	ret = sigfillset(&sigmask);
	if (ret) {
		GNIX_WARN(FI_LOG_EP_CTRL,
		"sigfillset call returned %d\n", ret);
	} else {

		ret = pthread_sigmask(SIG_SETMASK,
					&sigmask, NULL);
		if (ret)
			GNIX_WARN(FI_LOG_EP_CTRL,
			"pthread_sigmask call returned %d\n", ret);
	}

	/*
	 * okay now we're ready to be cancelable.
	 */

	pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state);

	pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	cqv[0] = nic->tx_cq_blk;
	cqv[1] = nic->rx_cq_blk;

try_again:
	status = GNI_CqVectorMonitor(cqv,
				     2,
				     -1,
				     &which);

	switch (status) {
	case GNI_RC_SUCCESS:

		/*
		 * first dequeue RX CQEs
		 */
		if (which == 1) {
			do {
				status = GNI_CqGetEvent(nic->rx_cq_blk,
							&cqe);
			} while (status == GNI_RC_SUCCESS);
		}
		_gnix_nic_progress(nic);
		retry = 1;
		break;
	case GNI_RC_TIMEOUT:
		retry = 1;
		break;
	case GNI_RC_NOT_DONE:
		retry = 1;
		break;
	case GNI_RC_INVALID_PARAM:
	case GNI_RC_INVALID_STATE:
	case GNI_RC_ERROR_RESOURCE:
	case GNI_RC_ERROR_NOMEM:
		retry = 0;
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "GNI_CqGetEvent returned %s\n",
			  gni_err_str[status]);
		break;
	default:
		retry = 0;
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "GNI_CqGetEvent returned unexpected code %s\n",
			  gni_err_str[status]);
		break;
	}

	if (retry)
		goto try_again;

	return NULL;
}
예제 #2
0
/* Destroy an unconnected VC.  More Support is needed to shutdown and destroy
 * an active VC. */
int _gnix_vc_destroy(struct gnix_vc *vc)
{
	int ret = FI_SUCCESS;
	struct gnix_nic *nic = NULL;
	gni_return_t status;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	if (vc->ep == NULL) {
		GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n");
		return -FI_EINVAL;
	}

	nic = vc->ep->nic;
	if (nic == NULL) {
		GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc);
		return -FI_EINVAL;
	}

	/*
	 * move vc state to terminating
	 */

	vc->conn_state = GNIX_VC_CONN_TERMINATING;

	/*
	 * try to unbind the gni_ep if non-NULL.
	 * If there are SMSG or PostFMA/RDMA outstanding
	 * wait here for them to complete
	 */

	if (vc->gni_ep != NULL) {
		while (status == GNI_RC_NOT_DONE) {

			fastlock_acquire(&nic->lock);
			status = GNI_EpUnbind(vc->gni_ep);
			fastlock_release(&nic->lock);

			if ((status != GNI_RC_NOT_DONE) &&
				(status != GNI_RC_SUCCESS)) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					"GNI_EpUnBind returned %s\n",
					  gni_err_str[status]);
				break;
			}

			if (status == GNI_RC_NOT_DONE)
				_gnix_nic_progress(nic);
		}
		fastlock_acquire(&nic->lock);
		status = GNI_EpDestroy(vc->gni_ep);
		fastlock_release(&nic->lock);
		if (status != GNI_RC_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
				"GNI_EpDestroy returned %s\n",
				  gni_err_str[status]);
	}

	/*
	 * if the vc is in a nic's work queue, remove it
	 */
	__gnix_vc_cancel(vc);

	/*
	 * We may eventually want to check the state of the VC, if we
	 * implement true VC shutdown.

	if ((vc->conn_state != GNIX_VC_CONN_NONE)
		&& (vc->conn_state != GNIX_VC_CONN_TERMINATED)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			      "vc conn state  %d\n",
			       vc->conn_state);
		GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n");
		return -FI_EBUSY;
	}
	 */

	/*
	 * if send_q not empty, return -FI_EBUSY
	 * Note for FI_EP_MSG type eps, this behavior
	 * may not be correct for handling fi_shutdown.
	 */

	if (!slist_empty(&vc->tx_queue)) {
		GNIX_WARN(FI_LOG_EP_CTRL, "vc sendqueue not empty\n");
		return -FI_EBUSY;
	}

	fastlock_destroy(&vc->tx_queue_lock);

	if (vc->smsg_mbox != NULL) {
		ret = _gnix_mbox_free(vc->smsg_mbox);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
			      "_gnix_mbox_free returned %s\n",
			      fi_strerror(-ret));
		vc->smsg_mbox = NULL;
	}

	if (vc->dgram != NULL) {
		ret = _gnix_dgram_free(vc->dgram);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_CTRL,
			      "_gnix_dgram_free returned %s\n",
			      fi_strerror(-ret));
		vc->dgram = NULL;
	}

	ret = _gnix_nic_free_rem_id(nic, vc->vc_id);
	if (ret != FI_SUCCESS)
		GNIX_WARN(FI_LOG_EP_CTRL,
		      "__gnix_vc_free_id returned %s\n",
		      fi_strerror(-ret));

	_gnix_free_bitmap(&vc->flags);

	free(vc);

	return ret;
}
예제 #3
0
ssize_t _gnix_atomic(struct gnix_fid_ep *ep,
		     enum gnix_fab_req_type fr_type,
		     const struct fi_msg_atomic *msg,
		     const struct fi_ioc *comparev,
		     void **compare_desc,
		     size_t compare_count,
		     struct fi_ioc *resultv,
		     void **result_desc,
		     size_t result_count,
		     uint64_t flags)
{
	struct gnix_vc *vc;
	struct gnix_fab_req *req;
	struct gnix_fid_mem_desc *md = NULL;
	int rc, len;
	struct fid_mr *auto_mr = NULL;
	void *mdesc = NULL;
	uint64_t compare_operand = 0;
	void *loc_addr = NULL;
	int dt_len, dt_align;
	int connected;

	if (!(flags & FI_INJECT) && !ep->send_cq &&
	    (((fr_type == GNIX_FAB_RQ_AMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX_S) &&
	      !ep->write_cntr) ||
	     ((fr_type == GNIX_FAB_RQ_FAMO ||
	      fr_type == GNIX_FAB_RQ_CAMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX_S) &&
	      !ep->read_cntr))) {
		return -FI_ENOCQ;
	}


	if (!ep || !msg || !msg->msg_iov ||
	    msg->msg_iov[0].count != 1 ||
	    msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT ||
	    !msg->rma_iov)
		return -FI_EINVAL;

	/*
	 * see fi_atomic man page
	 */

	if ((msg->op != FI_ATOMIC_READ) &&
		!msg->msg_iov[0].addr)
		return -FI_EINVAL;

	if (flags & FI_TRIGGER) {
		struct fi_triggered_context *trigger_context =
				(struct fi_triggered_context *)msg->context;
		if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) ||
		    (flags & FI_INJECT)) {
			return -FI_EINVAL;
		}
	}

	if (fr_type == GNIX_FAB_RQ_CAMO) {
		if (!comparev || !comparev[0].addr || compare_count != 1)
			return -FI_EINVAL;

		compare_operand = *(uint64_t *)comparev[0].addr;
	}

	dt_len = ofi_datatype_size(msg->datatype);
	dt_align = dt_len - 1;
	len = dt_len * msg->msg_iov->count;

	if (msg->rma_iov->addr & dt_align) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "Invalid target alignment: %d (mask 0x%x)\n",
			  msg->rma_iov->addr, dt_align);
		return -FI_EINVAL;
	}

	/* need a memory descriptor for all fetching and comparison AMOs */
	if (fr_type == GNIX_FAB_RQ_FAMO ||
	    fr_type == GNIX_FAB_RQ_CAMO ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX_S) {
		if (!resultv || !resultv[0].addr || result_count != 1)
			return -FI_EINVAL;

		loc_addr = resultv[0].addr;

		if ((uint64_t)loc_addr & dt_align) {
			GNIX_INFO(FI_LOG_EP_DATA,
				  "Invalid source alignment: %d (mask 0x%x)\n",
				  loc_addr, dt_align);
			return -FI_EINVAL;
		}

		if (!result_desc || !result_desc[0]) {
			rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
					 loc_addr, len, FI_READ | FI_WRITE,
					 0, 0, 0, &auto_mr,
					 NULL, ep->auth_key, GNIX_PROV_REG);
			if (rc != FI_SUCCESS) {
				GNIX_INFO(FI_LOG_EP_DATA,
					  "Failed to auto-register local buffer: %d\n",
					  rc);
				return rc;
			}
			flags |= FI_LOCAL_MR;
			mdesc = (void *)auto_mr;
			GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n",
				  auto_mr);
		} else {
			mdesc = result_desc[0];
		}
	}

	/* setup fabric request */
	req = _gnix_fr_alloc(ep);
	if (!req) {
		GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n");
		rc = -FI_ENOSPC;
		goto err_fr_alloc;
	}

	req->type = fr_type;
	req->gnix_ep = ep;
	req->user_context = msg->context;
	req->work_fn = _gnix_amo_post_req;

	if (mdesc) {
		md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid);
	}
	req->amo.loc_md = (void *)md;
	req->amo.loc_addr = (uint64_t)loc_addr;

	if ((fr_type == GNIX_FAB_RQ_NAMO_AX)   ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX)  ||
	    (fr_type == GNIX_FAB_RQ_NAMO_AX_S) ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) {
		req->amo.first_operand =
			*(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand =
			*((uint64_t *)(msg->msg_iov[0].addr) + 1);
	} else if (msg->op == FI_ATOMIC_READ) {
		req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */
	} else if (msg->op == FI_CSWAP) {
		req->amo.first_operand = compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
	} else if (msg->op == FI_MSWAP) {
		req->amo.first_operand = ~compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand &= compare_operand;
	} else {
		req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr;
	}

	req->amo.rem_addr = msg->rma_iov->addr;
	req->amo.rem_mr_key = msg->rma_iov->key;
	req->amo.len = len;
	req->amo.imm = msg->data;
	req->amo.datatype = msg->datatype;
	req->amo.op = msg->op;
	req->flags = flags;

	/* Inject interfaces always suppress completions.  If
	 * SELECTIVE_COMPLETION is set, honor any setting.  Otherwise, always
	 * deliver a completion. */
	if ((flags & GNIX_SUPPRESS_COMPLETION) ||
	    (ep->send_selective_completion && !(flags & FI_COMPLETION))) {
		req->flags &= ~FI_COMPLETION;
	} else {
		req->flags |= FI_COMPLETION;
	}

	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);

	/* find VC for target */
	rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n",
			  msg->addr, rc);
		goto err_get_vc;
	}

	req->vc = vc;

	rc = _gnix_vc_queue_tx_req(req);
	connected = (vc->conn_state == GNIX_VC_CONNECTED);

	COND_RELEASE(ep->requires_lock, &ep->vc_lock);

	/*
	 *If a new VC was allocated, progress CM before returning.
	 * If the VC is connected and there's a backlog, poke
	 * the nic progress engine befure returning.
	 */
	if (!connected) {
		_gnix_cm_nic_progress(ep->cm_nic);
	} else if (!dlist_empty(&vc->tx_queue)) {
		_gnix_nic_progress(vc->ep->nic);
	}

	return rc;

err_get_vc:
	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
err_fr_alloc:
	if (auto_mr) {
		fi_close(&auto_mr->fid);
	}
	return rc;
}