Esempio n. 1
0
/* Post a REG_MR Work Request to register a memory region
 * for remote access via RDMA READ or RDMA WRITE.
 */
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
	    int nsegs, bool writing, struct rpcrdma_mr **out)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
	struct rpcrdma_frwr *frwr;
	struct rpcrdma_mr *mr;
	struct ib_mr *ibmr;
	struct ib_reg_wr *reg_wr;
	struct ib_send_wr *bad_wr;
	int rc, i, n;
	u8 key;

	mr = NULL;
	do {
		if (mr)
			rpcrdma_mr_defer_recovery(mr);
		mr = rpcrdma_mr_get(r_xprt);
		if (!mr)
			return ERR_PTR(-ENOBUFS);
	} while (mr->frwr.fr_state != FRWR_IS_INVALID);
	frwr = &mr->frwr;
	frwr->fr_state = FRWR_IS_VALID;

	if (nsegs > ia->ri_max_frwr_depth)
		nsegs = ia->ri_max_frwr_depth;
	for (i = 0; i < nsegs;) {
		if (seg->mr_page)
			sg_set_page(&mr->mr_sg[i],
				    seg->mr_page,
				    seg->mr_len,
				    offset_in_page(seg->mr_offset));
		else
			sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
				   seg->mr_len);

		++seg;
		++i;
		if (holes_ok)
			continue;
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
			break;
	}
	mr->mr_dir = rpcrdma_data_dir(writing);

	mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
	if (!mr->mr_nents)
		goto out_dmamap_err;

	ibmr = frwr->fr_mr;
	n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
	if (unlikely(n != mr->mr_nents))
		goto out_mapmr_err;

	key = (u8)(ibmr->rkey & 0x000000FF);
	ib_update_fast_reg_key(ibmr, ++key);

	reg_wr = &frwr->fr_regwr;
	reg_wr->wr.next = NULL;
	reg_wr->wr.opcode = IB_WR_REG_MR;
	frwr->fr_cqe.done = frwr_wc_fastreg;
	reg_wr->wr.wr_cqe = &frwr->fr_cqe;
	reg_wr->wr.num_sge = 0;
	reg_wr->wr.send_flags = 0;
	reg_wr->mr = ibmr;
	reg_wr->key = ibmr->rkey;
	reg_wr->access = writing ?
			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
			 IB_ACCESS_REMOTE_READ;

	rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
	if (rc)
		goto out_senderr;

	mr->mr_handle = ibmr->rkey;
	mr->mr_length = ibmr->length;
	mr->mr_offset = ibmr->iova;

	*out = mr;
	return seg;

out_dmamap_err:
	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
	       mr->mr_sg, i);
	frwr->fr_state = FRWR_IS_INVALID;
	rpcrdma_mr_put(mr);
	return ERR_PTR(-EIO);

out_mapmr_err:
	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
	       frwr->fr_mr, n, mr->mr_nents);
	rpcrdma_mr_defer_recovery(mr);
	return ERR_PTR(-EIO);

out_senderr:
	pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
	rpcrdma_mr_defer_recovery(mr);
	return ERR_PTR(-ENOTCONN);
}
Esempio n. 2
0
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_send_wr wr, *bad_wr;
	struct ib_sge sge;
	int err = 0;
	unsigned long flags;
	struct p9_rdma_context *c = NULL;
	struct p9_rdma_context *rpl_context = NULL;

	/* When an error occurs between posting the recv and the send,
	 * there will be a receive context posted without a pending request.
	 * Since there is no way to "un-post" it, we remember it and skip
	 * post_recv() for the next request.
	 * So here,
	 * see if we are this `next request' and need to absorb an excess rc.
	 * If yes, then drop and free our own, and do not recv_post().
	 **/
	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
			/* Got one ! */
			kfree(req->rc);
			req->rc = NULL;
			goto dont_need_post_recv;
		} else {
			/* We raced and lost. */
			atomic_inc(&rdma->excess_rc);
		}
	}

	/* Allocate an fcall for the reply */
	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
	if (!rpl_context) {
		err = -ENOMEM;
		goto recv_error;
	}
	rpl_context->rc = req->rc;

	/*
	 * Post a receive buffer for this request. We need to ensure
	 * there is a reply buffer available for every outstanding
	 * request. A flushed request can result in no reply for an
	 * outstanding request, so we must keep a count to avoid
	 * overflowing the RQ.
	 */
	if (down_interruptible(&rdma->rq_sem)) {
		err = -EINTR;
		goto recv_error;
	}

	err = post_recv(client, rpl_context);
	if (err) {
		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
		goto recv_error;
	}
	/* remove posted receive buffer from request structure */
	req->rc = NULL;

dont_need_post_recv:
	/* Post the request */
	c = kmalloc(sizeof *c, GFP_NOFS);
	if (!c) {
		err = -ENOMEM;
		goto send_error;
	}
	c->req = req;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->req->tc->sdata, c->req->tc->size,
				    DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
		err = -EIO;
		goto send_error;
	}

	sge.addr = c->busa;
	sge.length = c->req->tc->size;
	sge.lkey = rdma->pd->local_dma_lkey;

	wr.next = NULL;
	c->wc_op = IB_WC_SEND;
	wr.wr_id = (unsigned long) c;
	wr.opcode = IB_WR_SEND;
	wr.send_flags = IB_SEND_SIGNALED;
	wr.sg_list = &sge;
	wr.num_sge = 1;

	if (down_interruptible(&rdma->sq_sem)) {
		err = -EINTR;
		goto send_error;
	}

	/* Mark request as `sent' *before* we actually send it,
	 * because doing if after could erase the REQ_STATUS_RCVD
	 * status in case of a very fast reply.
	 */
	req->status = REQ_STATUS_SENT;
	err = ib_post_send(rdma->qp, &wr, &bad_wr);
	if (err)
		goto send_error;

	/* Success */
	return 0;

 /* Handle errors that happened during or while preparing the send: */
 send_error:
	req->status = REQ_STATUS_ERROR;
	kfree(c);
	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);

	/* Ach.
	 *  We did recv_post(), but not send. We have one recv_post in excess.
	 */
	atomic_inc(&rdma->excess_rc);
	return err;

 /* Handle errors that happened during or while preparing post_recv(): */
 recv_error:
	kfree(rpl_context);
	spin_lock_irqsave(&rdma->req_lock, flags);
	if (rdma->state < P9_RDMA_CLOSING) {
		rdma->state = P9_RDMA_CLOSING;
		spin_unlock_irqrestore(&rdma->req_lock, flags);
		rdma_disconnect(rdma->cm_id);
	} else
		spin_unlock_irqrestore(&rdma->req_lock, flags);
	return err;
}
Esempio n. 3
0
static void bw_test(struct krping_cb *cb)
{
	int ccnt, scnt, rcnt;
	int iters=cb->count;
	ccnt = 0;
	scnt = 0;
	rcnt = 0;
	struct timeval start_tv, stop_tv;
	cycles_t *post_cycles_start, *post_cycles_stop;
	cycles_t *poll_cycles_start, *poll_cycles_stop;
	cycles_t *last_poll_cycles_start;
	cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
	int i;
	int cycle_iters = 1000;
	int err;

	err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop,
				&poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start);
			  
	if (err) {
		log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__);
		return;
	}

	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
	cb->rdma_sq_wr.sg_list->length = cb->size;

	if (cycle_iters > iters)
		cycle_iters = iters;
	microtime(&start_tv);
	while (scnt < iters || ccnt < iters) {

		while (scnt < iters && scnt - ccnt < cb->txdepth) {
			struct ib_send_wr *bad_wr;

			if (scnt < cycle_iters)
				post_cycles_start[scnt] = get_cycles();
			if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
				log(LOG_ERR,  "Couldn't post send: scnt=%d\n",
					scnt);
				return;
			}
			if (scnt < cycle_iters)
				post_cycles_stop[scnt] = get_cycles();
			++scnt;
		}

		if (ccnt < iters) {
			int ne;
			struct ib_wc wc;

			if (ccnt < cycle_iters)
				poll_cycles_start[ccnt] = get_cycles();
			do {
				if (ccnt < cycle_iters)
					last_poll_cycles_start[ccnt] = get_cycles();
				ne = ib_poll_cq(cb->cq, 1, &wc);
			} while (ne == 0);
			if (ccnt < cycle_iters)
				poll_cycles_stop[ccnt] = get_cycles();
			ccnt += 1;

			if (ne < 0) {
				log(LOG_ERR, "poll CQ failed %d\n", ne);
				return;
			}
			if (wc.status != IB_WC_SUCCESS) {
				log(LOG_ERR, "Completion wth error at %s:\n",
					cb->server ? "server" : "client");
				log(LOG_ERR, "Failed status %d: wr_id %d\n",
					wc.status, (int) wc.wr_id);
				return;
			}
		}
	}
	microtime(&stop_tv);

        if (stop_tv.tv_usec < start_tv.tv_usec) {
                stop_tv.tv_usec += 1000000;
                stop_tv.tv_sec  -= 1;
        }

	for (i=0; i < cycle_iters; i++) {
		sum_post += post_cycles_stop[i] - post_cycles_start[i];
		sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
		sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i];
	}

	log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n",
		stop_tv.tv_sec - start_tv.tv_sec, 
		stop_tv.tv_usec - start_tv.tv_usec,
		scnt, cb->size, cycle_iters, 
		(unsigned long long)sum_post, (unsigned long long)sum_poll, 
		(unsigned long long)sum_last_poll);

	free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, 
			poll_cycles_stop, last_poll_cycles_start);
}
Esempio n. 4
0
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
	struct p9_trans_rdma *rdma = client->trans;
	struct ib_send_wr wr, *bad_wr;
	struct ib_sge sge;
	int err = 0;
	unsigned long flags;
	struct p9_rdma_context *c = NULL;
	struct p9_rdma_context *rpl_context = NULL;

	/* Allocate an fcall for the reply */
	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
	if (!rpl_context) {
		err = -ENOMEM;
		goto err_close;
	}

	/*
	 * If the request has a buffer, steal it, otherwise
	 * allocate a new one.  Typically, requests should already
	 * have receive buffers allocated and just swap them around
	 */
	if (!req->rc) {
		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
				  GFP_NOFS);
		if (req->rc) {
			req->rc->sdata = (char *) req->rc +
						sizeof(struct p9_fcall);
			req->rc->capacity = client->msize;
		}
	}
	rpl_context->rc = req->rc;
	if (!rpl_context->rc) {
		err = -ENOMEM;
		goto err_free2;
	}

	/*
	 * Post a receive buffer for this request. We need to ensure
	 * there is a reply buffer available for every outstanding
	 * request. A flushed request can result in no reply for an
	 * outstanding request, so we must keep a count to avoid
	 * overflowing the RQ.
	 */
	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
		err = post_recv(client, rpl_context);
		if (err)
			goto err_free1;
	} else
		atomic_dec(&rdma->rq_count);

	/* remove posted receive buffer from request structure */
	req->rc = NULL;

	/* Post the request */
	c = kmalloc(sizeof *c, GFP_NOFS);
	if (!c) {
		err = -ENOMEM;
		goto err_free1;
	}
	c->req = req;

	c->busa = ib_dma_map_single(rdma->cm_id->device,
				    c->req->tc->sdata, c->req->tc->size,
				    DMA_TO_DEVICE);
	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
		goto error;

	sge.addr = c->busa;
	sge.length = c->req->tc->size;
	sge.lkey = rdma->lkey;

	wr.next = NULL;
	c->wc_op = IB_WC_SEND;
	wr.wr_id = (unsigned long) c;
	wr.opcode = IB_WR_SEND;
	wr.send_flags = IB_SEND_SIGNALED;
	wr.sg_list = &sge;
	wr.num_sge = 1;

	if (down_interruptible(&rdma->sq_sem))
		goto error;

	return ib_post_send(rdma->qp, &wr, &bad_wr);

 error:
	kfree(c);
	kfree(rpl_context->rc);
	kfree(rpl_context);
	p9_debug(P9_DEBUG_ERROR, "EIO\n");
	return -EIO;
 err_free1:
	kfree(rpl_context->rc);
 err_free2:
	kfree(rpl_context);
 err_close:
	spin_lock_irqsave(&rdma->req_lock, flags);
	if (rdma->state < P9_RDMA_CLOSING) {
		rdma->state = P9_RDMA_CLOSING;
		spin_unlock_irqrestore(&rdma->req_lock, flags);
		rdma_disconnect(rdma->cm_id);
	} else
		spin_unlock_irqrestore(&rdma->req_lock, flags);
	return err;
}
Esempio n. 5
0
static void rlat_test(struct krping_cb *cb)
{
	int scnt;
	int iters = cb->count;
	struct timeval start_tv, stop_tv;
	int ret;
	struct ib_wc wc;
	struct ib_send_wr *bad_wr;
	int ne;

	scnt = 0;
	cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
	cb->rdma_sq_wr.sg_list->length = cb->size;

	microtime(&start_tv);
 	if (!cb->poll) {
 		cb->state = RDMA_READ_ADV;
 		ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
 	}
	while (scnt < iters) {

 		cb->state = RDMA_READ_ADV;
		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR,  
				"Couldn't post send: ret=%d scnt %d\n",
				ret, scnt);
			return;
		}

		do {
			if (!cb->poll) {
				krping_wait(cb, RDMA_READ_COMPLETE);
				if (cb->state == RDMA_READ_COMPLETE) {
					ne = 1;
					ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
				} else {
					ne = -1;
				}
			} else
				ne = ib_poll_cq(cb->cq, 1, &wc);
			if (cb->state == ERROR) {
				log(LOG_ERR, 
				       "state == ERROR...bailing scnt %d\n", scnt);
				return;
			}
		} while (ne == 0);

		if (ne < 0) {
			log(LOG_ERR, "poll CQ failed %d\n", ne);
			return;
		}
 		if (cb->poll && wc.status != IB_WC_SUCCESS) {
			log(LOG_ERR, "Completion wth error at %s:\n",
				cb->server ? "server" : "client");
			log(LOG_ERR, "Failed status %d: wr_id %d\n",
				wc.status, (int) wc.wr_id);
			return;
		}
		++scnt;
	}
	microtime(&stop_tv);

        if (stop_tv.tv_usec < start_tv.tv_usec) {
                stop_tv.tv_usec += 1000000;
                stop_tv.tv_sec  -= 1;
        }

	log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d\n",
		stop_tv.tv_sec - start_tv.tv_sec, 
		stop_tv.tv_usec - start_tv.tv_usec,
		scnt, cb->size);
}
Esempio n. 6
0
static void krping_test_server(struct krping_cb *cb)
{
	struct ib_send_wr *bad_wr;
	int ret;

	while (1) {
		/* Wait for client's Start STAG/TO/Len */
		krping_wait(cb, RDMA_READ_ADV);
		if (cb->state != RDMA_READ_ADV) {
			DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n",
				cb->state);
			break;
		}

		DEBUG_LOG(PFX "server received sink adv\n");

		/* Issue RDMA Read. */
		cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
		cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
		cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
		cb->rdma_sq_wr.sg_list->length = cb->remote_len;

		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}
		DEBUG_LOG(PFX "server posted rdma read req \n");

		/* Wait for read completion */
		krping_wait(cb, RDMA_READ_COMPLETE);
		if (cb->state != RDMA_READ_COMPLETE) {
			log(LOG_ERR,  
			       "wait for RDMA_READ_COMPLETE state %d\n",
			       cb->state);
			break;
		}
		DEBUG_LOG(PFX "server received read complete\n");

		/* Display data in recv buf */
		if (cb->verbose)
			DEBUG_LOG("server ping data: %s\n", cb->rdma_buf);

		/* Tell client to continue */
		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}
		DEBUG_LOG(PFX "server posted go ahead\n");

		/* Wait for client's RDMA STAG/TO/Len */
		krping_wait(cb, RDMA_WRITE_ADV);
		if (cb->state != RDMA_WRITE_ADV) {
			log(LOG_ERR,  
			       "wait for RDMA_WRITE_ADV state %d\n",
			       cb->state);
			break;
		}
		DEBUG_LOG(PFX "server received sink adv\n");

		/* RDMA Write echo data */
		cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
		cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
		cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
		cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1;
		DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n",
			  cb->rdma_sq_wr.sg_list->lkey,
			  (unsigned long long)cb->rdma_sq_wr.sg_list->addr,
			  cb->rdma_sq_wr.sg_list->length);

		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}

		/* Wait for completion */
		krping_wait(cb, RDMA_WRITE_COMPLETE);
		if (cb->state != RDMA_WRITE_COMPLETE) {
			log(LOG_ERR,  
			       "wait for RDMA_WRITE_COMPLETE state %d\n",
			       cb->state);
			break;
		}
		DEBUG_LOG(PFX "server rdma write complete \n");

		cb->state = CONNECTED;

		/* Tell client to begin again */
		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}
		DEBUG_LOG(PFX "server posted go ahead\n");
	}
}
Esempio n. 7
0
static void krping_rlat_test_client(struct krping_cb *cb)
{
	struct ib_send_wr *bad_wr;
	struct ib_wc wc;
	int ret;

	cb->state = RDMA_READ_ADV;

	/* Send STAG/TO/Len to client */
	if (cb->dma_mr)
		krping_format_send(cb, cb->start_addr, cb->dma_mr);
	else
		krping_format_send(cb, cb->start_addr, cb->rdma_mr);
	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
	if (ret) {
		log(LOG_ERR, "post send error %d\n", ret);
		return;
	}

	/* Spin waiting for send completion */
	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
	if (ret < 0) {
		log(LOG_ERR, "poll error %d\n", ret);
		return;
	}
	if (wc.status) {
		log(LOG_ERR, "send completion error %d\n", wc.status);
		return;
	}

	/* Spin waiting for server's Start STAG/TO/Len */
	while (cb->state < RDMA_WRITE_ADV) {
		krping_cq_event_handler(cb->cq, cb);
	}

#if 0
{
	int i;
	struct timeval start, stop;
	time_t sec;
	suseconds_t usec;
	unsigned long long elapsed;
	struct ib_wc wc;
	struct ib_send_wr *bad_wr;
	int ne;
	
	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
	cb->rdma_sq_wr.sg_list->length = 0;
	cb->rdma_sq_wr.num_sge = 0;

	microtime(&start);
	for (i=0; i < 100000; i++) {
		if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
			log(LOG_ERR,  "Couldn't post send\n");
			return;
		}
		do {
			ne = ib_poll_cq(cb->cq, 1, &wc);
		} while (ne == 0);
		if (ne < 0) {
			log(LOG_ERR, "poll CQ failed %d\n", ne);
			return;
		}
		if (wc.status != IB_WC_SUCCESS) {
			log(LOG_ERR, "Completion wth error at %s:\n",
				cb->server ? "server" : "client");
			log(LOG_ERR, "Failed status %d: wr_id %d\n",
				wc.status, (int) wc.wr_id);
			return;
		}
	}
	microtime(&stop);
	
	if (stop.tv_usec < start.tv_usec) {
		stop.tv_usec += 1000000;
		stop.tv_sec  -= 1;
	}
	sec     = stop.tv_sec - start.tv_sec;
	usec    = stop.tv_usec - start.tv_usec;
	elapsed = sec * 1000000 + usec;
	log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed);
}
#endif

	rlat_test(cb);
}
Esempio n. 8
0
static void krping_test_client(struct krping_cb *cb)
{
	int ping, start, cc, i, ret;
	struct ib_send_wr *bad_wr;
	unsigned char c;

	start = 65;
	for (ping = 0; !cb->count || ping < cb->count; ping++) {
		cb->state = RDMA_READ_ADV;

		/* Put some ascii text in the buffer. */
		cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
		for (i = cc, c = start; i < cb->size; i++) {
			cb->start_buf[i] = c;
			c++;
			if (c > 122)
				c = 65;
		}
		start++;
		if (start > 122)
			start = 65;
		cb->start_buf[cb->size - 1] = 0;

		if (cb->dma_mr)
			krping_format_send(cb, cb->start_addr, cb->dma_mr);
		else
			krping_format_send(cb, cb->start_addr, cb->start_mr);

		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}

		/* Wait for server to ACK */
		krping_wait(cb, RDMA_WRITE_ADV);
		if (cb->state != RDMA_WRITE_ADV) {
			log(LOG_ERR,  
			       "wait for RDMA_WRITE_ADV state %d\n",
			       cb->state);
			break;
		}

		if (cb->dma_mr)
			krping_format_send(cb, cb->rdma_addr, cb->dma_mr);
		else
			krping_format_send(cb, cb->rdma_addr, cb->rdma_mr);

		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
		if (ret) {
			log(LOG_ERR, "post send error %d\n", ret);
			break;
		}

		/* Wait for the server to say the RDMA Write is complete. */
		krping_wait(cb, RDMA_WRITE_COMPLETE);
		if (cb->state != RDMA_WRITE_COMPLETE) {
			log(LOG_ERR,  
			       "wait for RDMA_WRITE_COMPLETE state %d\n",
			       cb->state);
			break;
		}

		if (cb->validate)
			if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
				log(LOG_ERR, "data mismatch!\n");
				break;
			}

		if (cb->verbose)
			DEBUG_LOG("ping data: %s\n", cb->rdma_buf);
	}
}
Esempio n. 9
0
void
sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb)
{
	struct sdp_buf *tx_req;
	struct sdp_bsdh *h;
	unsigned long mseq;
	struct ib_device *dev;
	struct ib_send_wr *bad_wr;
	struct ib_sge ibsge[SDP_MAX_SEND_SGES];
	struct ib_sge *sge;
	struct ib_send_wr tx_wr = { NULL };
	int i, rc;
	u64 addr;

	SDPSTATS_COUNTER_MID_INC(post_send, h->mid);
	SDPSTATS_HIST(send_size, mb->len);

	if (!ssk->qp_active) {
		m_freem(mb);
		return;
	}

	mseq = ring_head(ssk->tx_ring);
	h = mtod(mb, struct sdp_bsdh *);
	ssk->tx_packets++;
	ssk->tx_bytes += mb->m_pkthdr.len;

#ifdef SDP_ZCOPY
	if (unlikely(h->mid == SDP_MID_SRCAVAIL)) {
		struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb);
		if (ssk->tx_sa != tx_sa) {
			sdp_dbg_data(ssk->socket, "SrcAvail cancelled "
					"before being sent!\n");
			WARN_ON(1);
			m_freem(mb);
			return;
		}
		TX_SRCAVAIL_STATE(mb)->mseq = mseq;
	}
#endif

	if (unlikely(mb->m_flags & M_URG))
		h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
	else
		h->flags = 0;

	mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */
	h->bufs = htons(rx_ring_posted(ssk));
	h->len = htonl(mb->m_pkthdr.len);
	h->mseq = htonl(mseq);
	h->mseq_ack = htonl(mseq_ack(ssk));

	sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d",
			mid2str(h->mid), rx_ring_posted(ssk), mseq,
			ntohl(h->mseq_ack));

	SDP_DUMP_PACKET(ssk->socket, "TX", mb, h);

	tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)];
	tx_req->mb = mb;
	dev = ssk->ib_device;
	sge = &ibsge[0];
	for (i = 0;  mb != NULL; i++, mb = mb->m_next, sge++) {
		addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,
		    DMA_TO_DEVICE);
		/* TODO: proper error handling */
		BUG_ON(ib_dma_mapping_error(dev, addr));
		BUG_ON(i >= SDP_MAX_SEND_SGES);
		tx_req->mapping[i] = addr;
		sge->addr = addr;
		sge->length = mb->m_len;
		sge->lkey = ssk->sdp_dev->mr->lkey;
	}
	tx_wr.next = NULL;
	tx_wr.wr_id = mseq | SDP_OP_SEND;
	tx_wr.sg_list = ibsge;
	tx_wr.num_sge = i;
	tx_wr.opcode = IB_WR_SEND;
	tx_wr.send_flags = IB_SEND_SIGNALED;
	if (unlikely(tx_req->mb->m_flags & M_URG))
		tx_wr.send_flags |= IB_SEND_SOLICITED;

	rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr);
	if (unlikely(rc)) {
		sdp_dbg(ssk->socket,
				"ib_post_send failed with status %d.\n", rc);

		sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE);

		sdp_notify(ssk, ECONNRESET);
		m_freem(tx_req->mb);
		return;
	}

	atomic_inc(&ssk->tx_ring.head);
	atomic_dec(&ssk->tx_ring.credits);
	atomic_set(&ssk->remote_credits, rx_ring_posted(ssk));

	return;
}