Exemple #1
0
int mlx_ep_open( struct fid_domain *domain, struct fi_info *info,
		struct fid_ep **fid, void *context)
{
	struct mlx_ep     *ep;
	struct mlx_domain *u_domain;
	int ofi_status = FI_SUCCESS;
	ucs_status_t status = UCS_OK;
	ucp_worker_params_t worker_params;
	worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
	worker_params.thread_mode = UCS_THREAD_MODE_MULTI;
	u_domain = container_of( domain, struct mlx_domain, u_domain.domain_fid);

	ep = (struct mlx_ep *) calloc(1, sizeof (struct mlx_ep));
	if (!ep) {
		return -ENOMEM;
	}

	ofi_status = ofi_endpoint_init(domain, &mlx_util_prov, info,
				       &ep->ep, context, mlx_ep_progress);
	if (ofi_status) {
		goto free_ep;
	}

	status = ucp_worker_create( u_domain->context,
				&worker_params,
				&(ep->worker));
	if (status != UCS_OK) {
		ofi_status = MLX_TRANSLATE_ERRCODE(status);
		ofi_atomic_dec32(&(u_domain->u_domain.ref));
		goto free_ep;
	}

	ep->ep.ep_fid.fid.ops = &mlx_fi_ops;
	ep->ep.ep_fid.ops = &mlx_ep_ops;
	ep->ep.ep_fid.cm = &mlx_cm_ops;
	ep->ep.ep_fid.tagged = &mlx_tagged_ops;
	ep->ep.flags = info->mode;
	ep->ep.caps = u_domain->u_domain.info_domain_caps;

	*fid = &(ep->ep.ep_fid);

	return FI_SUCCESS;
free_ep:
	free(ep);
	return ofi_status;
}
void mlx_send_callback( void *request,
			ucs_status_t status)
{
	struct util_cq *cq;
	struct mlx_request *mlx_req = request;
	struct fi_cq_tagged_entry *t_entry;
	struct util_cq_err_entry *err;

	cq = mlx_req->cq;

	if (status == UCS_ERR_CANCELED) {
		ucp_request_release(request);
		return;
	}

	fastlock_acquire(&cq->cq_lock);

	t_entry = cirque_tail(cq->cirq);
	*t_entry = (mlx_req->completion.tagged);
	cirque_commit(cq->cirq);

	if (status != UCS_OK){
		t_entry->flags |= UTIL_FLAG_ERROR;
		err = calloc(1, sizeof(struct util_cq_err_entry));
		if (!err) {
			FI_WARN(&mlx_prov, FI_LOG_CQ,
				"out of memory, cannot report CQ error\n");
			return;
		}

		err->err_entry = (mlx_req->completion.error);
		err->err_entry.prov_errno = (int)status;
		err->err_entry.err = MLX_TRANSLATE_ERRCODE(status);
		err->err_entry.olen = 0;
		slist_insert_tail(&err->list_entry, &cq->err_list);
	}

	mlx_req->type = MLX_FI_REQ_UNINITIALIZED;

	fastlock_release(&cq->cq_lock);
	ucp_request_release(request);
}
void mlx_recv_callback (
			void *request,
			ucs_status_t status,
			ucp_tag_recv_info_t *info)
{
	struct util_cq *cq;
	struct mlx_request *mlx_req;

	mlx_req = (struct mlx_request*)request;
	if (status == UCS_ERR_CANCELED) {
		ucp_request_release(request);
		return;
	}

	cq = mlx_req->cq;

	mlx_req->completion.tagged.tag = info->sender_tag;
	mlx_req->completion.tagged.len = info->length;

	if (status != UCS_OK) {
		mlx_req->completion.error.prov_errno = (int)status;
		mlx_req->completion.error.err = MLX_TRANSLATE_ERRCODE(status);
	}

	if (mlx_req->type == MLX_FI_REQ_UNINITIALIZED) {
		if (status != UCS_OK) {
			mlx_req->completion.error.olen = info->length;
			mlx_req->type = MLX_FI_REQ_UNEXPECTED_ERR;
		} else {
			mlx_req->type = MLX_FI_REQ_UNEXPECTED;
		}
	} else {
		if (status != UCS_OK) {
			mlx_req->completion.error.olen = info->length -
						mlx_req->completion.error.len;
		}

		struct fi_cq_tagged_entry *t_entry;
		t_entry = cirque_tail(cq->cirq);
		*t_entry = (mlx_req->completion.tagged);

		if (status != UCS_OK) {
			struct util_cq_err_entry* err;
			t_entry->flags |= UTIL_FLAG_ERROR;

			err = calloc(1, sizeof(struct util_cq_err_entry));
			if (!err) {
				FI_WARN(&mlx_prov, FI_LOG_CQ,
					"out of memory, cannot report CQ error\n");
				return;
			}

			err->err_entry = (mlx_req->completion.error);
			slist_insert_tail(&err->list_entry, &cq->err_list);
		}

		if (cq->src){
			cq->src[cirque_windex((struct mlx_comp_cirq*)(cq->cirq))] =
					FI_ADDR_NOTAVAIL;
		}

		if (cq->wait) {
			cq->wait->signal(cq->wait);
		}

		mlx_req->type = MLX_FI_REQ_UNINITIALIZED;
		cirque_commit(cq->cirq);
		ucp_request_release(request);
	}
	fastlock_release(&cq->cq_lock);
}
Exemple #4
0
static int mlx_init_errcodes()
{
	MLX_TRANSLATE_ERRCODE (UCS_OK)                  = -FI_SUCCESS;
	MLX_TRANSLATE_ERRCODE (UCS_INPROGRESS)          = -FI_EINPROGRESS;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_MESSAGE)      = -FI_ENOMSG;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_RESOURCE)     = -FI_EINVAL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_IO_ERROR)        = -FI_EIO;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_MEMORY)       = -FI_ENOMEM;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_INVALID_PARAM)   = -FI_EINVAL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_UNREACHABLE)     = -FI_ENETUNREACH;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_INVALID_ADDR)    = -FI_EINVAL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NOT_IMPLEMENTED) = -FI_ENOSYS;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_MESSAGE_TRUNCATED) = -FI_EMSGSIZE;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_PROGRESS)     = -FI_EAGAIN;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_BUFFER_TOO_SMALL)= -FI_ETOOSMALL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_ELEM)         = -FI_ENOENT;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_SOME_CONNECTS_FAILED)   = -FI_EIO;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_NO_DEVICE)       = -FI_ENODEV;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_BUSY)            = -FI_EBUSY;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_CANCELED)        = -FI_ECANCELED;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_SHMEM_SEGMENT)   = -FI_EINVAL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_ALREADY_EXISTS)  = -EEXIST;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_OUT_OF_RANGE)    = -FI_EINVAL;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_TIMED_OUT)       = -FI_ETIMEDOUT;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_EXCEEDS_LIMIT)   = -FI_E2BIG;
	MLX_TRANSLATE_ERRCODE (UCS_ERR_UNSUPPORTED)     = -FI_ENOSYS;
	return 0;
}