예제 #1
0
/**
 * Destroy a slab.
 *
 * @param[in] handle	Handle to the allocator being used.
 * @param[in] slab	Slab to be destroyed.
 *
 * @return FI_SUCCESS	On successful slab destruction.
 *
 * @return -FI_EINVAL	On invalid handle or slab being given as parameters.
 */
static int __destroy_slab(struct gnix_mbox_alloc_handle *handle,
			  struct gnix_slab *slab)
{
	size_t total_size;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	if (!handle || !slab) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Invalid argument handle or slab.\n");
		return -FI_EINVAL;
	}

	total_size = handle->page_size * __page_count(handle);

	_gnix_free_bitmap(slab->used);
	free(slab->used);

	COND_ACQUIRE(handle->nic_handle->requires_lock, &handle->nic_handle->lock);
	GNI_MemDeregister(handle->nic_handle->gni_nic_hndl,
			  &slab->memory_handle);
	COND_RELEASE(handle->nic_handle->requires_lock, &handle->nic_handle->lock);

	munmap(slab->base, total_size);

	free(slab);

	return FI_SUCCESS;
}
예제 #2
0
static int __gnix_deregister_region(
		void *handle,
		void *context)
{
	struct gnix_fid_mem_desc *mr = (struct gnix_fid_mem_desc *) handle;
	gni_return_t ret;
	struct gnix_fid_domain *domain;
	struct gnix_nic *nic;

	domain = mr->domain;
	nic = mr->nic;

	COND_ACQUIRE(nic->requires_lock, &nic->lock);
	ret = GNI_MemDeregister(nic->gni_nic_hndl, &mr->mem_hndl);
	COND_RELEASE(nic->requires_lock, &nic->lock);
	if (ret == GNI_RC_SUCCESS) {
		/* release reference to domain */
		_gnix_ref_put(domain);

		/* release reference to nic */
		_gnix_ref_put(nic);
	} else {
		GNIX_INFO(FI_LOG_MR, "failed to deregister memory"
			  " region, entry=%p ret=%i\n", handle, ret);
	}

	return ret;
}
예제 #3
0
DIRECT_FN STATIC int gnix_shutdown(struct fid_ep *ep, uint64_t flags)
{
	int ret;
	struct gnix_fid_ep *ep_priv;
	struct fi_eq_cm_entry eq_entry = {0};

	if (!ep)
		return -FI_EINVAL;

	ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid);

	COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock);

	eq_entry.fid = &ep_priv->ep_fid.fid;

	ret = fi_eq_write(&ep_priv->eq->eq_fid, FI_SHUTDOWN, &eq_entry,
			  sizeof(eq_entry), 0);
	if (ret != sizeof(eq_entry)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "fi_eq_write failed, err: %d\n", ret);
	} else {
		ret = FI_SUCCESS;
	}

	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	return ret;
}
예제 #4
0
static int __gnix_amo_send_cntr_req(void *arg)
{
	struct gnix_fab_req *req = (struct gnix_fab_req *)arg;
	struct gnix_fid_ep *ep = req->gnix_ep;
	struct gnix_nic *nic = ep->nic;
	struct gnix_tx_descriptor *txd;
	gni_return_t status;
	int rc;
	int inject_err = _gnix_req_inject_err(req);

	rc = _gnix_nic_tx_alloc(nic, &txd);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA,
				"_gnix_nic_tx_alloc() failed: %d\n",
				rc);
		return -FI_ENOSPC;
	}

	txd->req = req;
	txd->completer_fn = __gnix_amo_txd_cntr_complete;

	if (req->type == GNIX_FAB_RQ_AMO) {
		txd->amo_cntr_hdr.flags = FI_REMOTE_WRITE;
	} else {
		txd->amo_cntr_hdr.flags = FI_REMOTE_READ;
	}

	COND_ACQUIRE(nic->requires_lock, &nic->lock);
	if (inject_err) {
		_gnix_nic_txd_err_inject(nic, txd);
		status = GNI_RC_SUCCESS;
	} else {
		status = GNI_SmsgSendWTag(req->vc->gni_ep,
					  &txd->amo_cntr_hdr,
					  sizeof(txd->amo_cntr_hdr),
					  NULL, 0, txd->id,
					  GNIX_SMSG_T_AMO_CNTR);
	}
	COND_RELEASE(nic->requires_lock, &nic->lock);

	if (status == GNI_RC_NOT_DONE) {
		_gnix_nic_tx_free(nic, txd);
		GNIX_INFO(FI_LOG_EP_DATA,
			  "GNI_SmsgSendWTag returned %s\n",
			  gni_err_str[status]);
	} else if (status != GNI_RC_SUCCESS) {
		_gnix_nic_tx_free(nic, txd);
		GNIX_WARN(FI_LOG_EP_DATA,
			  "GNI_SmsgSendWTag returned %s\n",
			  gni_err_str[status]);
	} else {
		GNIX_INFO(FI_LOG_EP_DATA, "Sent RMA CQ data, req: %p\n", req);
	}

	return gnixu_to_fi_errno(status);
}
예제 #5
0
/* Check for a connection response on an FI_EP_MSG. */
int _gnix_ep_progress(struct gnix_fid_ep *ep)
{
	int ret, bytes_read;
	struct gnix_pep_sock_connresp resp;

	/* No lock, fast exit. */
	if (ep->conn_state != GNIX_EP_CONNECTING) {
		return FI_SUCCESS;
	}

	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);

	if (ep->conn_state != GNIX_EP_CONNECTING) {
		COND_RELEASE(ep->requires_lock, &ep->vc_lock);
		return FI_SUCCESS;
	}

	/* Check for a connection response. */
	bytes_read = read(ep->conn_fd, &resp, sizeof(resp));
	if (bytes_read >= 0) {
		if (bytes_read == sizeof(resp)) {
			/* Received response. */
			ret = __gnix_ep_connresp(ep, &resp);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_CTRL,
					  "__gnix_pep_connreq failed, %d\n",
					  ret);
			}
		} else {
			GNIX_FATAL(FI_LOG_EP_CTRL,
				   "Unexpected read size: %d\n",
				   bytes_read);
		}
	} else if (errno != EAGAIN) {
		GNIX_WARN(FI_LOG_EP_CTRL, "Read error: %s\n", strerror(errno));
	}

	COND_RELEASE(ep->requires_lock, &ep->vc_lock);

	return FI_SUCCESS;
}
예제 #6
0
static int __nic_rx_progress(struct gnix_nic *nic)
{
	int ret = FI_SUCCESS;
	gni_return_t status = GNI_RC_NOT_DONE;
	gni_cq_entry_t cqe;

	status = GNI_CqTestEvent(nic->rx_cq);
	if (status == GNI_RC_NOT_DONE)
		return FI_SUCCESS;

	COND_ACQUIRE(nic->requires_lock, &nic->lock);

	do {
		status = GNI_CqGetEvent(nic->rx_cq, &cqe);
		if (unlikely(status == GNI_RC_NOT_DONE)) {
			ret = FI_SUCCESS;
			break;
		}

		if (likely(status == GNI_RC_SUCCESS)) {
			/* Find and schedule the associated VC. */
			ret = __process_rx_cqe(nic, cqe);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_DATA,
					  "process_rx_cqe() failed: %d\n",
					  ret);
			}
		} else if (status == GNI_RC_ERROR_RESOURCE) {
			/* The remote CQ was overrun.  Events related to any VC
			 * could have been missed.  Schedule each VC to be sure
			 * all messages are processed. */
			assert(GNI_CQ_OVERRUN(cqe));
			__nic_rx_overrun(nic);
		} else {
			GNIX_WARN(FI_LOG_EP_DATA,
				  "GNI_CqGetEvent returned %s\n",
				  gni_err_str[status]);
			ret = gnixu_to_fi_errno(status);
			break;
		}
	} while (1);

	COND_RELEASE(nic->requires_lock, &nic->lock);

	return ret;
}
예제 #7
0
static int __nic_rx_overrun(struct gnix_nic *nic)
{
	int i, max_id, ret;
	struct gnix_vc *vc;
	gni_return_t status;
	gni_cq_entry_t cqe;

	GNIX_WARN(FI_LOG_EP_DATA, "\n");

	/* clear out the CQ */
	/*
	 * TODO:  really need to process CQEs better for error reporting,
	 * etc.
	 */
	while ((status = GNI_CqGetEvent(nic->rx_cq, &cqe)) == GNI_RC_SUCCESS);
	assert(status == GNI_RC_NOT_DONE);

	COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock);
	max_id = nic->vc_id_table_count;
	COND_RELEASE(nic->requires_lock, &nic->vc_id_lock);
	/*
	 * TODO: optimization would
	 * be to keep track of last time
	 * this happened and where smsg msgs.
	 * were found.
	 */
	for (i = 0; i < max_id; i++) {
		ret = _gnix_test_bit(&nic->vc_id_bitmap, i);
		if (ret) {
			vc = __gnix_nic_elem_by_rem_id(nic, i);
			ret = _gnix_vc_dequeue_smsg(vc);
			if (ret != FI_SUCCESS) {
				GNIX_WARN(FI_LOG_EP_DATA,
					  "_gnix_vc_dqueue_smsg returned %d\n",
					  ret);
			}
		}
	}

	return FI_SUCCESS;
}
예제 #8
0
/**
 * Create a slab from a handle and append to the slab list.
 *
 * @param[in] handle	Handle to the allocator being used.
 *
 * @return FI_SUCCESS	On successful slab creation.
 *
 * @return -FI_ENOMEM	if failure to allocate memory for slab or bitmap.
 * @return [Unspec]	if failure in alloc_bitmap. Will return error code from
 * alloc_bitmap.
 * @return [Unspec]	if failure in GNI_MemRegister. Converts gni_return_t
 * status code to FI_ERRNO value.
 */
static int __create_slab(struct gnix_mbox_alloc_handle *handle)
{
	struct gnix_slab *slab;
	gni_return_t status;
	char error_buf[256];
	char *error;
	size_t total_size;
	int ret;
	int vmdh_index = -1;
	int flags = GNI_MEM_READWRITE;
	struct gnix_auth_key *info;

	GNIX_TRACE(FI_LOG_EP_CTRL, "\n");

	slab = calloc(1, sizeof(*slab));
	if (!slab) {
		error = strerror_r(errno, error_buf, sizeof(error_buf));
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Error allocating slab: %s\n",
			  error);
		ret = -FI_ENOMEM;
		goto err_slab_calloc;
	}

	total_size = handle->page_size * __page_count(handle);
	GNIX_DEBUG(FI_LOG_EP_CTRL, "total_size requested for mmap: %zu.\n",
		   total_size);

	slab->used = calloc(1, sizeof(*(slab->used)));
	if (!slab->used) {
		error = strerror_r(errno, error_buf, sizeof(error_buf));
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Error allocating bitmap: %s\n",
			  error);
		ret = -FI_ENOMEM;
		goto err_bitmap_calloc;
	}

	slab->base = mmap(0, total_size, (PROT_READ | PROT_WRITE), MAP_SHARED,
			  handle->fd, handle->last_offset);
	if (slab->base == MAP_FAILED) {
		error = strerror_r(errno, error_buf, sizeof(error_buf));
		GNIX_WARN(FI_LOG_EP_CTRL, "%s\n", error);
		ret = -FI_ENOMEM;
		goto err_mmap;
	}

	ret = _gnix_alloc_bitmap(slab->used, __mbox_count(handle), NULL);
	if (ret) {
		GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating bitmap.\n");
		goto err_alloc_bitmap;
	}

	COND_ACQUIRE(handle->nic_handle->requires_lock, &handle->nic_handle->lock);
	if (handle->nic_handle->using_vmdh) {
		info = _gnix_auth_key_lookup(GNIX_PROV_DEFAULT_AUTH_KEY,
				GNIX_PROV_DEFAULT_AUTH_KEYLEN);
		assert(info);

		if (!handle->nic_handle->mdd_resources_set) {
			/* check to see if the ptag registration limit was set
			 * yet or not -- becomes read-only after success */
			_gnix_auth_key_enable(info);

			status = GNI_SetMddResources(
				handle->nic_handle->gni_nic_hndl,
				(info->attr.prov_key_limit +
				 info->attr.user_key_limit));
			assert(status == GNI_RC_SUCCESS);

			handle->nic_handle->mdd_resources_set = 1;
		}

		vmdh_index = _gnix_get_next_reserved_key(info);
		if (vmdh_index <= 0) {
			GNIX_FATAL(FI_LOG_DOMAIN,
				"failed to get reserved key for mbox "
				"registration, rc=%d\n",
				vmdh_index);
		}
		flags |= GNI_MEM_USE_VMDH;
	}

	status = GNI_MemRegister(handle->nic_handle->gni_nic_hndl,
				 (uint64_t) slab->base, total_size,
				 handle->cq_handle,
				 flags, vmdh_index,
				 &slab->memory_handle);
	COND_RELEASE(handle->nic_handle->requires_lock, &handle->nic_handle->lock);
	if (status != GNI_RC_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL, "GNI_MemRegister failed: %s\n",
			  gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
		goto err_memregister;
	}

	slab->allocator = handle;

	gnix_slist_insert_tail(&slab->list_entry, &handle->slab_list);

	handle->last_offset += total_size;

	return ret;

err_memregister:
	_gnix_free_bitmap(slab->used);
err_alloc_bitmap:
	munmap(slab->base, total_size);
err_mmap:
	free(slab->used);
err_bitmap_calloc:
	free(slab);
err_slab_calloc:
	return ret;
}
예제 #9
0
파일: gnix_cq.c 프로젝트: ofiwg/libfabric
static int gnix_cq_set_wait(struct gnix_fid_cq *cq)
{
	int ret = FI_SUCCESS;

	GNIX_TRACE(FI_LOG_CQ, "\n");

	struct fi_wait_attr requested = {
		.wait_obj = cq->attr.wait_obj,
		.flags = 0
	};

	switch (cq->attr.wait_obj) {
	case FI_WAIT_UNSPEC:
	case FI_WAIT_FD:
	case FI_WAIT_MUTEX_COND:
		ret = gnix_wait_open(&cq->domain->fabric->fab_fid,
				&requested, &cq->wait);
		break;
	case FI_WAIT_SET:
		ret = _gnix_wait_set_add(cq->attr.wait_set, &cq->cq_fid.fid);
		if (!ret)
			cq->wait = cq->attr.wait_set;

		break;
	default:
		break;
	}

	return ret;
}

static void free_cq_entry(struct slist_entry *item)
{
	struct gnix_cq_entry *entry;

	entry = container_of(item, struct gnix_cq_entry, item);

	free(entry->the_entry);
	free(entry);
}

static struct slist_entry *alloc_cq_entry(size_t size)
{
	struct gnix_cq_entry *entry = malloc(sizeof(*entry));

	if (!entry) {
		GNIX_DEBUG(FI_LOG_CQ, "out of memory\n");
		goto err;
	}

	entry->the_entry = malloc(size);
	if (!entry->the_entry) {
		GNIX_DEBUG(FI_LOG_CQ, "out of memory\n");
		goto cleanup;
	}

	return &entry->item;

cleanup:
	free(entry);
err:
	return NULL;
}

static int __gnix_cq_progress(struct gnix_fid_cq *cq)
{
	return _gnix_prog_progress(&cq->pset);
}

/*******************************************************************************
 * Exposed helper functions
 ******************************************************************************/
ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep,
			   void *op_context, uint64_t flags, size_t len,
			   void *buf, uint64_t data, uint64_t tag,
			   fi_addr_t src_addr)
{
	struct gnix_cq_entry *event;
	struct slist_entry *item;
	uint64_t mask;
	ssize_t ret = FI_SUCCESS;

	if (ep) {
		if (ep->info && ep->info->mode & FI_NOTIFY_FLAGS_ONLY) {
			mask = (FI_REMOTE_CQ_DATA | FI_MULTI_RECV);

			if (flags & FI_RMA_EVENT) {
				mask |= (FI_REMOTE_READ | FI_REMOTE_WRITE |
					 FI_RMA);
			}

			flags &= mask;
		}
	}

	COND_ACQUIRE(cq->requires_lock, &cq->lock);

	item = _gnix_queue_get_free(cq->events);
	if (!item) {
		GNIX_DEBUG(FI_LOG_CQ, "error creating cq_entry\n");
		ret = -FI_ENOMEM;
		goto err;
	}

	event = container_of(item, struct gnix_cq_entry, item);

	assert(event->the_entry);

	fill_function[cq->attr.format](event->the_entry, op_context, flags,
			len, buf, data, tag);
	event->src_addr = src_addr;

	_gnix_queue_enqueue(cq->events, &event->item);
	GNIX_DEBUG(FI_LOG_CQ, "Added event: %lx\n", op_context);

	if (cq->wait)
		_gnix_signal_wait_obj(cq->wait);

err:
	COND_RELEASE(cq->requires_lock, &cq->lock);

	return ret;
}

ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context,
			   uint64_t flags, size_t len, void *buf,
			   uint64_t data, uint64_t tag, size_t olen,
			   int err, int prov_errno, void *err_data,
			   size_t err_data_size)
{
	struct fi_cq_err_entry *error;
	struct gnix_cq_entry *event;
	struct slist_entry *item;

	ssize_t ret = FI_SUCCESS;

	GNIX_INFO(FI_LOG_CQ, "creating error event entry\n");


	COND_ACQUIRE(cq->requires_lock, &cq->lock);

	item = _gnix_queue_get_free(cq->errors);
	if (!item) {
		GNIX_WARN(FI_LOG_CQ, "error creating error entry\n");
		ret = -FI_ENOMEM;
		goto err;
	}

	event = container_of(item, struct gnix_cq_entry, item);

	error = event->the_entry;

	error->op_context = op_context;
	error->flags = flags;
	error->len = len;
	error->buf = buf;
	error->data = data;
	error->tag = tag;
	error->olen = olen;
	error->err = err;
	error->prov_errno = prov_errno;
	error->err_data = err_data;
	error->err_data_size = err_data_size;

	_gnix_queue_enqueue(cq->errors, &event->item);

	if (cq->wait)
		_gnix_signal_wait_obj(cq->wait);

err:
	COND_RELEASE(cq->requires_lock, &cq->lock);

	return ret;
}

int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj,
			  int (*prog_fn)(void *data))
{
	return _gnix_prog_obj_add(&cq->pset, obj, prog_fn);
}

int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj,
			  int (*prog_fn)(void *data))
{
	return _gnix_prog_obj_rem(&cq->pset, obj, prog_fn);
}

static void __cq_destruct(void *obj)
{
	struct gnix_fid_cq *cq = (struct gnix_fid_cq *) obj;

	_gnix_ref_put(cq->domain);

	switch (cq->attr.wait_obj) {
	case FI_WAIT_NONE:
		break;
	case FI_WAIT_SET:
		_gnix_wait_set_remove(cq->wait, &cq->cq_fid.fid);
		break;
	case FI_WAIT_UNSPEC:
	case FI_WAIT_FD:
	case FI_WAIT_MUTEX_COND:
		assert(cq->wait);
		gnix_wait_close(&cq->wait->fid);
		break;
	default:
		GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n",
			  cq->attr.wait_obj);
		break;
	}

	_gnix_prog_fini(&cq->pset);

	_gnix_queue_destroy(cq->events);
	_gnix_queue_destroy(cq->errors);

	fastlock_destroy(&cq->lock);
	free(cq->cq_fid.ops);
	free(cq->cq_fid.fid.ops);
	free(cq);
}

/*******************************************************************************
 * API functions.
 ******************************************************************************/
static int gnix_cq_close(fid_t fid)
{
	struct gnix_fid_cq *cq;
	int references_held;

	GNIX_TRACE(FI_LOG_CQ, "\n");

	cq = container_of(fid, struct gnix_fid_cq, cq_fid);

	references_held = _gnix_ref_put(cq);

	if (references_held) {
		GNIX_INFO(FI_LOG_CQ, "failed to fully close cq due to lingering "
				"references. references=%i cq=%p\n",
				references_held, cq);
	}

	return FI_SUCCESS;
}

static ssize_t __gnix_cq_readfrom(struct fid_cq *cq, void *buf,
					  size_t count, fi_addr_t *src_addr)
{
	struct gnix_fid_cq *cq_priv;
	struct gnix_cq_entry *event;
	struct slist_entry *temp;

	ssize_t read_count = 0;

	if (!cq || !buf || !count)
		return -FI_EINVAL;

	cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid);

	__gnix_cq_progress(cq_priv);

	if (_gnix_queue_peek(cq_priv->errors))
		return -FI_EAVAIL;

	COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock);

	while (_gnix_queue_peek(cq_priv->events) && count--) {
		temp = _gnix_queue_dequeue(cq_priv->events);
		event = container_of(temp, struct gnix_cq_entry, item);

		assert(event->the_entry);
		memcpy(buf, event->the_entry, cq_priv->entry_size);
		if (src_addr)
			memcpy(&src_addr[read_count], &event->src_addr, sizeof(fi_addr_t));

		_gnix_queue_enqueue_free(cq_priv->events, &event->item);

		buf = (void *) ((uint8_t *) buf + cq_priv->entry_size);

		read_count++;
	}

	COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock);

	return read_count ?: -FI_EAGAIN;
}

static ssize_t __gnix_cq_sreadfrom(int blocking, struct fid_cq *cq, void *buf,
				     size_t count, fi_addr_t *src_addr,
				     const void *cond, int timeout)
{
	struct gnix_fid_cq *cq_priv;

	cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid);
	if ((blocking && !cq_priv->wait) ||
	    (blocking && cq_priv->attr.wait_obj == FI_WAIT_SET))
		return -FI_EINVAL;

	if (_gnix_queue_peek(cq_priv->errors))
		return -FI_EAVAIL;

	if (cq_priv->wait)
		gnix_wait_wait((struct fid_wait *)cq_priv->wait, timeout);


	return __gnix_cq_readfrom(cq, buf, count, src_addr);

}

DIRECT_FN STATIC ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf,
					   size_t count, fi_addr_t *src_addr,
					   const void *cond, int timeout)
{
	return __gnix_cq_sreadfrom(1, cq, buf, count, src_addr, cond, timeout);
}

DIRECT_FN STATIC ssize_t gnix_cq_read(struct fid_cq *cq,
				      void *buf,
				      size_t count)
{
	return __gnix_cq_sreadfrom(0, cq, buf, count, NULL, NULL, 0);
}

DIRECT_FN STATIC ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf,
				       size_t count, const void *cond,
				       int timeout)
{
	return __gnix_cq_sreadfrom(1, cq, buf, count, NULL, cond, timeout);
}

DIRECT_FN STATIC ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf,
					  size_t count, fi_addr_t *src_addr)
{
	return __gnix_cq_sreadfrom(0, cq, buf, count, src_addr, NULL, 0);
}

DIRECT_FN STATIC ssize_t gnix_cq_readerr(struct fid_cq *cq,
					 struct fi_cq_err_entry *buf,
					 uint64_t flags)
{
	struct gnix_fid_cq *cq_priv;
	struct gnix_cq_entry *event;
	struct slist_entry *entry;
	size_t err_data_cpylen;
	struct fi_cq_err_entry *gnix_cq_err;

	ssize_t read_count = 0;

	if (!cq || !buf)
		return -FI_EINVAL;

	cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid);

	/*
	 * we need to progress cq.  some apps may be only using
	 * cq to check for errors.
	 */

	_gnix_prog_progress(&cq_priv->pset);

	COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock);

	entry = _gnix_queue_dequeue(cq_priv->errors);
	if (!entry) {
		read_count = -FI_EAGAIN;
		goto err;
	}

	event = container_of(entry, struct gnix_cq_entry, item);
	gnix_cq_err = event->the_entry;

	buf->op_context = gnix_cq_err->op_context;
	buf->flags = gnix_cq_err->flags;
	buf->len = gnix_cq_err->len;
	buf->buf = gnix_cq_err->buf;
	buf->data = gnix_cq_err->data;
	buf->tag = gnix_cq_err->tag;
	buf->olen = gnix_cq_err->olen;
	buf->err = gnix_cq_err->err;
	buf->prov_errno = gnix_cq_err->prov_errno;

	if (gnix_cq_err->err_data != NULL) {
		/*
		 * Note: If the api version is >= 1.5 then copy err_data into
		 * buf->err_data and copy at most buf->err_data_size.
		 * If buf->err_data_size is zero or the api version is < 1.5,
		 * use the old method of allocating space in provider.
		 */
		if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version,
		    FI_VERSION(1, 5)) || buf->err_data_size == 0) {
			err_data_cpylen = sizeof(cq_priv->err_data);

			memcpy(cq_priv->err_data, gnix_cq_err->err_data,
				err_data_cpylen);

			buf->err_data = cq_priv->err_data;
		} else {
			if (buf->err_data == NULL)
				return -FI_EINVAL;

			err_data_cpylen = MIN(buf->err_data_size,
						gnix_cq_err->err_data_size);
			memcpy(buf->err_data, gnix_cq_err->err_data, err_data_cpylen);
			buf->err_data_size = err_data_cpylen;
		}
		free(gnix_cq_err->err_data);
		gnix_cq_err->err_data = NULL;
	} else {
		if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version,
		    FI_VERSION(1, 5))) {
			buf->err_data = NULL;
		} else {
			buf->err_data_size = 0;
		}
	}

	_gnix_queue_enqueue_free(cq_priv->errors, &event->item);

	read_count++;

err:
	COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock);

	return read_count;
}

DIRECT_FN STATIC const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno,
					      const void *prov_data, char *buf,
					      size_t len)
{
	return NULL;
}

DIRECT_FN STATIC int gnix_cq_signal(struct fid_cq *cq)
{
	struct gnix_fid_cq *cq_priv;

	cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid);

	if (cq_priv->wait)
		_gnix_signal_wait_obj(cq_priv->wait);

	return FI_SUCCESS;
}

static int gnix_cq_control(struct fid *cq, int command, void *arg)
{

	switch (command) {
	case FI_GETWAIT:
		return -FI_ENOSYS;
	default:
		return -FI_EINVAL;
	}
}


DIRECT_FN int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
			   struct fid_cq **cq, void *context)
{
	struct gnix_fid_domain *domain_priv;
	struct gnix_fid_cq *cq_priv;
	struct fi_ops_cq *cq_ops;
	struct fi_ops *fi_cq_ops;

	int ret = FI_SUCCESS;

	GNIX_TRACE(FI_LOG_CQ, "\n");

	cq_ops = calloc(1, sizeof(*cq_ops));
	if (!cq_ops) {
		return -FI_ENOMEM;
	}

	fi_cq_ops = calloc(1, sizeof(*fi_cq_ops));
	if (!fi_cq_ops) {
		ret = -FI_ENOMEM;
		goto free_cq_ops;
	}

	*cq_ops = gnix_cq_ops;
	*fi_cq_ops = gnix_cq_fi_ops;

	ret = verify_cq_attr(attr, cq_ops, fi_cq_ops);
	if (ret)
		goto free_fi_cq_ops;

	domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid);
	if (!domain_priv) {
		ret = -FI_EINVAL;
		goto free_fi_cq_ops;
	}

	cq_priv = calloc(1, sizeof(*cq_priv));
	if (!cq_priv) {
		ret = -FI_ENOMEM;
		goto free_fi_cq_ops;
	}

	cq_priv->requires_lock = (domain_priv->thread_model !=
			FI_THREAD_COMPLETION);

	cq_priv->domain = domain_priv;
	cq_priv->attr = *attr;

	_gnix_ref_init(&cq_priv->ref_cnt, 1, __cq_destruct);
	_gnix_ref_get(cq_priv->domain);

	_gnix_prog_init(&cq_priv->pset);

	cq_priv->cq_fid.fid.fclass = FI_CLASS_CQ;
	cq_priv->cq_fid.fid.context = context;
	cq_priv->cq_fid.fid.ops = fi_cq_ops;
	cq_priv->cq_fid.ops = cq_ops;

	/*
	 * Although we don't need to store entry_size since we're already
	 * storing the format, this might provide a performance benefit
	 * when allocating storage.
	 */
	cq_priv->entry_size = format_sizes[cq_priv->attr.format];

	fastlock_init(&cq_priv->lock);
	ret = gnix_cq_set_wait(cq_priv);
	if (ret)
		goto free_cq_priv;

	ret = _gnix_queue_create(&cq_priv->events, alloc_cq_entry,
				 free_cq_entry, cq_priv->entry_size,
				 cq_priv->attr.size);
	if (ret)
		goto free_cq_priv;

	ret = _gnix_queue_create(&cq_priv->errors, alloc_cq_entry,
				 free_cq_entry, sizeof(struct fi_cq_err_entry),
				 0);
	if (ret)
		goto free_gnix_queue;

	*cq = &cq_priv->cq_fid;
	return ret;

free_gnix_queue:
	_gnix_queue_destroy(cq_priv->events);
free_cq_priv:
	_gnix_ref_put(cq_priv->domain);
	fastlock_destroy(&cq_priv->lock);
	free(cq_priv);
free_fi_cq_ops:
	free(fi_cq_ops);
free_cq_ops:
	free(cq_ops);

	return ret;
}


/*******************************************************************************
 * FI_OPS_* data structures.
 ******************************************************************************/
static const struct fi_ops gnix_cq_fi_ops = {
	.size = sizeof(struct fi_ops),
	.close = gnix_cq_close,
	.bind = fi_no_bind,
	.control = gnix_cq_control,
	.ops_open = fi_no_ops_open
};

static const struct fi_ops_cq gnix_cq_ops = {
	.size = sizeof(struct fi_ops_cq),
	.read = gnix_cq_read,
	.readfrom = gnix_cq_readfrom,
	.readerr = gnix_cq_readerr,
	.sread = gnix_cq_sread,
	.sreadfrom = gnix_cq_sreadfrom,
	.signal = gnix_cq_signal,
	.strerror = gnix_cq_strerror
};
예제 #10
0
DIRECT_FN STATIC int gnix_accept(struct fid_ep *ep, const void *param,
				 size_t paramlen)
{
	int ret;
	struct gnix_vc *vc;
	struct gnix_fid_ep *ep_priv;
	struct gnix_pep_sock_conn *conn;
	struct gnix_pep_sock_connresp resp;
	struct fi_eq_cm_entry eq_entry, *eqe_ptr;
	struct gnix_mbox *mbox = NULL;
	struct gnix_av_addr_entry av_entry;

	if (!ep || (paramlen && !param) || paramlen > GNIX_CM_DATA_MAX_SIZE)
		return -FI_EINVAL;

	ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid);

	COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock);

	/* Look up and unpack the connection request used to create this EP. */
	conn = (struct gnix_pep_sock_conn *)ep_priv->info->handle;
	if (!conn || conn->fid.fclass != FI_CLASS_CONNREQ) {
		ret = -FI_EINVAL;
		goto err_unlock;
	}

	/* Create new VC without CM data. */
	av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr;
	av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id;
	ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL, "Failed to create VC: %d\n", ret);
		goto err_unlock;
	}
	ep_priv->vc = vc;
	ep_priv->vc->peer_caps = conn->req.peer_caps;
	ep_priv->vc->peer_id = conn->req.vc_id;

	ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_DATA, "_gnix_mbox_alloc returned %s\n",
			  fi_strerror(-ret));
		goto err_mbox_alloc;
	}
	vc->smsg_mbox = mbox;

	/* Initialize the GNI connection. */
	ret = _gnix_vc_smsg_init(vc, conn->req.vc_id,
				 &conn->req.vc_mbox_attr,
				 &conn->req.cq_irq_mdh);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "_gnix_vc_smsg_init returned %s\n",
			  fi_strerror(-ret));
		goto err_smsg_init;
	}

	vc->conn_state = GNIX_VC_CONNECTED;

	/* Send ACK with VC attrs to allow peer to initialize GNI connection. */
	resp.cmd = GNIX_PEP_SOCK_RESP_ACCEPT;

	resp.vc_id = vc->vc_id;
	resp.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	resp.vc_mbox_attr.msg_buffer = mbox->base;
	resp.vc_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
	resp.vc_mbox_attr.mem_hndl = *mbox->memory_handle;
	resp.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
	resp.vc_mbox_attr.mbox_maxcredit =
			ep_priv->domain->params.mbox_maxcredit;
	resp.vc_mbox_attr.msg_maxsize =
			ep_priv->domain->params.mbox_msg_maxsize;
	resp.cq_irq_mdh = ep_priv->nic->irq_mem_hndl;
	resp.peer_caps = ep_priv->caps;

	resp.cm_data_len = paramlen;
	if (paramlen) {
		eqe_ptr = (struct fi_eq_cm_entry *)resp.eqe_buf;
		memcpy(eqe_ptr->data, param, paramlen);
	}

	ret = write(conn->sock_fd, &resp, sizeof(resp));
	if (ret != sizeof(resp)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to send resp, errno: %d\n",
			  errno);
		ret = -FI_EIO;
		goto err_write;
	}

	/* Notify user that this side is connected. */
	eq_entry.fid = &ep_priv->ep_fid.fid;

	ret = fi_eq_write(&ep_priv->eq->eq_fid, FI_CONNECTED, &eq_entry,
			  sizeof(eq_entry), 0);
	if (ret != sizeof(eq_entry)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "fi_eq_write failed, err: %d\n", ret);
		goto err_eq_write;
	}

	/* Free the connection request. */
	free(conn);

	ep_priv->conn_state = GNIX_EP_CONNECTED;

	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn accept: %p\n", ep_priv);

	return FI_SUCCESS;

err_eq_write:
err_write:
err_smsg_init:
	_gnix_mbox_free(ep_priv->vc->smsg_mbox);
	ep_priv->vc->smsg_mbox = NULL;
err_mbox_alloc:
	_gnix_vc_destroy(ep_priv->vc);
	ep_priv->vc = NULL;
err_unlock:
	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	return ret;
}
예제 #11
0
DIRECT_FN STATIC int gnix_connect(struct fid_ep *ep, const void *addr,
				  const void *param, size_t paramlen)
{
	int ret;
	struct gnix_fid_ep *ep_priv;
	struct sockaddr_in saddr;
	struct gnix_pep_sock_connreq req;
	struct fi_eq_cm_entry *eqe_ptr;
	struct gnix_vc *vc;
	struct gnix_mbox *mbox = NULL;
	struct gnix_av_addr_entry av_entry;

	if (!ep || !addr || (paramlen && !param) ||
	    paramlen > GNIX_CM_DATA_MAX_SIZE)
		return -FI_EINVAL;

	ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid);

	COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock);

	if (ep_priv->conn_state != GNIX_EP_UNCONNECTED) {
		ret = -FI_EINVAL;
		goto err_unlock;
	}

	ret = _gnix_pe_to_ip(addr, &saddr);
	if (ret != FI_SUCCESS) {
		GNIX_INFO(FI_LOG_EP_CTRL,
			  "Failed to translate gnix_ep_name to IP\n");
		goto err_unlock;
	}

	/* Create new VC without CM data. */
	av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr;
	av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id;
	ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to create VC:: %d\n",
			  ret);
		goto err_unlock;
	}
	ep_priv->vc = vc;

	ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox);
	if (ret != FI_SUCCESS) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "_gnix_mbox_alloc returned %s\n",
			  fi_strerror(-ret));
		goto err_mbox_alloc;
	}
	vc->smsg_mbox = mbox;

	ep_priv->conn_fd = socket(AF_INET, SOCK_STREAM, 0);
	if (ep_priv->conn_fd < 0) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to create connect socket, errno: %d\n",
			  errno);
		ret = -FI_ENOSPC;
		goto err_socket;
	}

	/* Currently blocks until connected. */
	ret = connect(ep_priv->conn_fd, (struct sockaddr *)&saddr,
		      sizeof(saddr));
	if (ret) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to connect, errno: %d\n",
			  errno);
		ret = -FI_EIO;
		goto err_connect;
	}

	req.info = *ep_priv->info;

	/* Note addrs are swapped. */
	memcpy(&req.dest_addr, (void *)&ep_priv->src_addr,
	       sizeof(req.dest_addr));
	memcpy(&ep_priv->dest_addr, addr, sizeof(ep_priv->dest_addr));
	memcpy(&req.src_addr, addr, sizeof(req.src_addr));

	if (ep_priv->info->tx_attr)
		req.tx_attr = *ep_priv->info->tx_attr;
	if (ep_priv->info->rx_attr)
		req.rx_attr = *ep_priv->info->rx_attr;
	if (ep_priv->info->ep_attr)
		req.ep_attr = *ep_priv->info->ep_attr;
	if (ep_priv->info->domain_attr)
		req.domain_attr = *ep_priv->info->domain_attr;
	if (ep_priv->info->fabric_attr)
		req.fabric_attr = *ep_priv->info->fabric_attr;

	req.fabric_attr.fabric = NULL;
	req.domain_attr.domain = NULL;

	req.vc_id = vc->vc_id;
	req.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
	req.vc_mbox_attr.msg_buffer = mbox->base;
	req.vc_mbox_attr.buff_size =  vc->ep->nic->mem_per_mbox;
	req.vc_mbox_attr.mem_hndl = *mbox->memory_handle;
	req.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset;
	req.vc_mbox_attr.mbox_maxcredit =
			ep_priv->domain->params.mbox_maxcredit;
	req.vc_mbox_attr.msg_maxsize = ep_priv->domain->params.mbox_msg_maxsize;
	req.cq_irq_mdh = ep_priv->nic->irq_mem_hndl;
	req.peer_caps = ep_priv->caps;

	req.cm_data_len = paramlen;
	if (paramlen) {
		eqe_ptr = (struct fi_eq_cm_entry *)req.eqe_buf;
		memcpy(eqe_ptr->data, param, paramlen);
	}

	ret = write(ep_priv->conn_fd, &req, sizeof(req));
	if (ret != sizeof(req)) {
		GNIX_WARN(FI_LOG_EP_CTRL,
			  "Failed to send req, errno: %d\n",
			  errno);
		ret = -FI_EIO;
		goto err_write;
	}
	/* set fd to non-blocking now since we can't block within the eq
	 * progress system
	 */
	fi_fd_nonblock(ep_priv->conn_fd);

	ep_priv->conn_state = GNIX_EP_CONNECTING;

	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn req: %p, %s\n",
		   ep_priv, inet_ntoa(saddr.sin_addr));

	return FI_SUCCESS;

err_write:
err_connect:
	close(ep_priv->conn_fd);
	ep_priv->conn_fd = -1;
err_socket:
	_gnix_mbox_free(ep_priv->vc->smsg_mbox);
	ep_priv->vc->smsg_mbox = NULL;
err_mbox_alloc:
	_gnix_vc_destroy(ep_priv->vc);
	ep_priv->vc = NULL;
err_unlock:
	COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock);

	return ret;
}
예제 #12
0
ssize_t _gnix_atomic(struct gnix_fid_ep *ep,
		     enum gnix_fab_req_type fr_type,
		     const struct fi_msg_atomic *msg,
		     const struct fi_ioc *comparev,
		     void **compare_desc,
		     size_t compare_count,
		     struct fi_ioc *resultv,
		     void **result_desc,
		     size_t result_count,
		     uint64_t flags)
{
	struct gnix_vc *vc;
	struct gnix_fab_req *req;
	struct gnix_fid_mem_desc *md = NULL;
	int rc, len;
	struct fid_mr *auto_mr = NULL;
	void *mdesc = NULL;
	uint64_t compare_operand = 0;
	void *loc_addr = NULL;
	int dt_len, dt_align;
	int connected;

	if (!(flags & FI_INJECT) && !ep->send_cq &&
	    (((fr_type == GNIX_FAB_RQ_AMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX_S) &&
	      !ep->write_cntr) ||
	     ((fr_type == GNIX_FAB_RQ_FAMO ||
	      fr_type == GNIX_FAB_RQ_CAMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX_S) &&
	      !ep->read_cntr))) {
		return -FI_ENOCQ;
	}


	if (!ep || !msg || !msg->msg_iov ||
	    msg->msg_iov[0].count != 1 ||
	    msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT ||
	    !msg->rma_iov)
		return -FI_EINVAL;

	/*
	 * see fi_atomic man page
	 */

	if ((msg->op != FI_ATOMIC_READ) &&
		!msg->msg_iov[0].addr)
		return -FI_EINVAL;

	if (flags & FI_TRIGGER) {
		struct fi_triggered_context *trigger_context =
				(struct fi_triggered_context *)msg->context;
		if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) ||
		    (flags & FI_INJECT)) {
			return -FI_EINVAL;
		}
	}

	if (fr_type == GNIX_FAB_RQ_CAMO) {
		if (!comparev || !comparev[0].addr || compare_count != 1)
			return -FI_EINVAL;

		compare_operand = *(uint64_t *)comparev[0].addr;
	}

	dt_len = ofi_datatype_size(msg->datatype);
	dt_align = dt_len - 1;
	len = dt_len * msg->msg_iov->count;

	if (msg->rma_iov->addr & dt_align) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "Invalid target alignment: %d (mask 0x%x)\n",
			  msg->rma_iov->addr, dt_align);
		return -FI_EINVAL;
	}

	/* need a memory descriptor for all fetching and comparison AMOs */
	if (fr_type == GNIX_FAB_RQ_FAMO ||
	    fr_type == GNIX_FAB_RQ_CAMO ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX_S) {
		if (!resultv || !resultv[0].addr || result_count != 1)
			return -FI_EINVAL;

		loc_addr = resultv[0].addr;

		if ((uint64_t)loc_addr & dt_align) {
			GNIX_INFO(FI_LOG_EP_DATA,
				  "Invalid source alignment: %d (mask 0x%x)\n",
				  loc_addr, dt_align);
			return -FI_EINVAL;
		}

		if (!result_desc || !result_desc[0]) {
			rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
					 loc_addr, len, FI_READ | FI_WRITE,
					 0, 0, 0, &auto_mr,
					 NULL, ep->auth_key, GNIX_PROV_REG);
			if (rc != FI_SUCCESS) {
				GNIX_INFO(FI_LOG_EP_DATA,
					  "Failed to auto-register local buffer: %d\n",
					  rc);
				return rc;
			}
			flags |= FI_LOCAL_MR;
			mdesc = (void *)auto_mr;
			GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n",
				  auto_mr);
		} else {
			mdesc = result_desc[0];
		}
	}

	/* setup fabric request */
	req = _gnix_fr_alloc(ep);
	if (!req) {
		GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n");
		rc = -FI_ENOSPC;
		goto err_fr_alloc;
	}

	req->type = fr_type;
	req->gnix_ep = ep;
	req->user_context = msg->context;
	req->work_fn = _gnix_amo_post_req;

	if (mdesc) {
		md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid);
	}
	req->amo.loc_md = (void *)md;
	req->amo.loc_addr = (uint64_t)loc_addr;

	if ((fr_type == GNIX_FAB_RQ_NAMO_AX)   ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX)  ||
	    (fr_type == GNIX_FAB_RQ_NAMO_AX_S) ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) {
		req->amo.first_operand =
			*(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand =
			*((uint64_t *)(msg->msg_iov[0].addr) + 1);
	} else if (msg->op == FI_ATOMIC_READ) {
		req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */
	} else if (msg->op == FI_CSWAP) {
		req->amo.first_operand = compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
	} else if (msg->op == FI_MSWAP) {
		req->amo.first_operand = ~compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand &= compare_operand;
	} else {
		req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr;
	}

	req->amo.rem_addr = msg->rma_iov->addr;
	req->amo.rem_mr_key = msg->rma_iov->key;
	req->amo.len = len;
	req->amo.imm = msg->data;
	req->amo.datatype = msg->datatype;
	req->amo.op = msg->op;
	req->flags = flags;

	/* Inject interfaces always suppress completions.  If
	 * SELECTIVE_COMPLETION is set, honor any setting.  Otherwise, always
	 * deliver a completion. */
	if ((flags & GNIX_SUPPRESS_COMPLETION) ||
	    (ep->send_selective_completion && !(flags & FI_COMPLETION))) {
		req->flags &= ~FI_COMPLETION;
	} else {
		req->flags |= FI_COMPLETION;
	}

	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);

	/* find VC for target */
	rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n",
			  msg->addr, rc);
		goto err_get_vc;
	}

	req->vc = vc;

	rc = _gnix_vc_queue_tx_req(req);
	connected = (vc->conn_state == GNIX_VC_CONNECTED);

	COND_RELEASE(ep->requires_lock, &ep->vc_lock);

	/*
	 *If a new VC was allocated, progress CM before returning.
	 * If the VC is connected and there's a backlog, poke
	 * the nic progress engine befure returning.
	 */
	if (!connected) {
		_gnix_cm_nic_progress(ep->cm_nic);
	} else if (!dlist_empty(&vc->tx_queue)) {
		_gnix_nic_progress(vc->ep->nic);
	}

	return rc;

err_get_vc:
	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
err_fr_alloc:
	if (auto_mr) {
		fi_close(&auto_mr->fid);
	}
	return rc;
}
예제 #13
0
int _gnix_amo_post_req(void *data)
{
	struct gnix_fab_req *fab_req = (struct gnix_fab_req *)data;
	struct gnix_fid_ep *ep = fab_req->gnix_ep;
	struct gnix_nic *nic = ep->nic;
	struct gnix_fid_mem_desc *loc_md;
	struct gnix_tx_descriptor *txd;
	gni_mem_handle_t mdh;
	gni_return_t status;
	int rc;
	int inject_err = _gnix_req_inject_err(fab_req);

	if (!gnix_ops_allowed(ep, fab_req->vc->peer_caps, fab_req->flags)) {
		GNIX_DEBUG(FI_LOG_EP_DATA, "flags:0x%llx, %s\n", fab_req->flags,
			   fi_tostr(&fab_req->flags, FI_TYPE_OP_FLAGS));
		GNIX_DEBUG(FI_LOG_EP_DATA, "caps:0x%llx, %s\n",
			   ep->caps, fi_tostr(&ep->caps, FI_TYPE_CAPS));
		GNIX_DEBUG(FI_LOG_EP_DATA, "peer_caps:0x%llx, %s\n",
			   fab_req->vc->peer_caps,
			   fi_tostr(&fab_req->vc->peer_caps, FI_TYPE_OP_FLAGS));

		rc = __gnix_amo_post_err(fab_req, FI_EOPNOTSUPP);
		if (rc != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_DATA,
				  "__gnix_amo_post_err() failed: %d\n", rc);
		return -FI_ECANCELED;
	}

	rc = _gnix_nic_tx_alloc(nic, &txd);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n",
			 rc);
		return -FI_ENOSPC;
	}

	txd->completer_fn = __gnix_amo_txd_complete;
	txd->req = fab_req;

	/* Mem handle CRC is not validated during FMA operations.  Skip this
	 * costly calculation. */
	_gnix_convert_key_to_mhdl_no_crc(
			(gnix_mr_key_t *)&fab_req->amo.rem_mr_key,
			&mdh);
	loc_md = (struct gnix_fid_mem_desc *)fab_req->amo.loc_md;

	txd->gni_desc.type = GNI_POST_AMO;
	txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */
	txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */
	txd->gni_desc.local_addr = (uint64_t)fab_req->amo.loc_addr;
	if (loc_md) {
		txd->gni_desc.local_mem_hndl = loc_md->mem_hndl;
	}
	txd->gni_desc.remote_addr = (uint64_t)fab_req->amo.rem_addr;
	txd->gni_desc.remote_mem_hndl = mdh;
	txd->gni_desc.length = fab_req->amo.len;
	txd->gni_desc.rdma_mode = 0; /* check flags */
	txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */

	txd->gni_desc.amo_cmd = _gnix_atomic_cmd(fab_req->amo.datatype,
						 fab_req->amo.op,
						 fab_req->type);
	txd->gni_desc.first_operand = fab_req->amo.first_operand;
	txd->gni_desc.second_operand = fab_req->amo.second_operand;

	GNIX_DEBUG(FI_LOG_EP_DATA, "fo:%016lx so:%016lx\n",
		   txd->gni_desc.first_operand, txd->gni_desc.second_operand);
	GNIX_DEBUG(FI_LOG_EP_DATA, "amo_cmd:%x\n",
		   txd->gni_desc.amo_cmd);
	GNIX_LOG_DUMP_TXD(txd);

	COND_ACQUIRE(nic->requires_lock, &nic->lock);

	if (OFI_UNLIKELY(inject_err)) {
		_gnix_nic_txd_err_inject(nic, txd);
		status = GNI_RC_SUCCESS;
	} else {
		status = GNI_PostFma(fab_req->vc->gni_ep, &txd->gni_desc);
	}

	COND_RELEASE(nic->requires_lock, &nic->lock);

	if (status != GNI_RC_SUCCESS) {
		_gnix_nic_tx_free(nic, txd);
		GNIX_INFO(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n",
			  gni_err_str[status]);
	}

	return gnixu_to_fi_errno(status);
}
예제 #14
0
static inline void *__gnix_generic_register(
		struct gnix_fid_domain *domain,
		struct gnix_fid_mem_desc *md,
		void *address,
		size_t length,
		gni_cq_handle_t dst_cq_hndl,
		int flags,
		int vmdh_index)
{
	struct gnix_nic *nic;
	gni_return_t grc = GNI_RC_SUCCESS;
	int rc;

	pthread_mutex_lock(&gnix_nic_list_lock);

	/* If the nic list is empty, create a nic */
	if (unlikely((dlist_empty(&gnix_nic_list_ptag[domain->ptag])))) {
		/* release the lock because we are not checking the list after
			this point. Additionally, gnix_nic_alloc takes the 
			lock to add the nic. */
		pthread_mutex_unlock(&gnix_nic_list_lock);

		rc = gnix_nic_alloc(domain, NULL, &nic);
		if (rc) {
			GNIX_INFO(FI_LOG_MR,
				  "could not allocate nic to do mr_reg,"
				  " ret=%i\n", rc);
			return NULL;
		}
	} else {
		nic = dlist_first_entry(&gnix_nic_list_ptag[domain->ptag], 
			struct gnix_nic, ptag_nic_list);
		if (unlikely(nic == NULL)) {
			GNIX_ERR(FI_LOG_MR, "Failed to find nic on "
				"ptag list\n");
			pthread_mutex_unlock(&gnix_nic_list_lock);
			return NULL;
		}
		_gnix_ref_get(nic);	
		pthread_mutex_unlock(&gnix_nic_list_lock);
        }

	COND_ACQUIRE(nic->requires_lock, &nic->lock);
	grc = GNI_MemRegister(nic->gni_nic_hndl, (uint64_t) address,
				  length,	dst_cq_hndl, flags,
				  vmdh_index, &md->mem_hndl);
	COND_RELEASE(nic->requires_lock, &nic->lock);

	if (unlikely(grc != GNI_RC_SUCCESS)) {
		GNIX_INFO(FI_LOG_MR, "failed to register memory with uGNI, "
			  "ret=%s\n", gni_err_str[grc]);
		_gnix_ref_put(nic);

		return NULL;
	}

	/* set up the mem desc */
	md->nic = nic;
	md->domain = domain;

	/* take references on domain */
	_gnix_ref_get(md->domain);

	return md;
}