/* Post a REG_MR Work Request to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ static struct rpcrdma_mr_seg * frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; struct rpcrdma_frwr *frwr; struct rpcrdma_mr *mr; struct ib_mr *ibmr; struct ib_reg_wr *reg_wr; struct ib_send_wr *bad_wr; int rc, i, n; u8 key; mr = NULL; do { if (mr) rpcrdma_mr_defer_recovery(mr); mr = rpcrdma_mr_get(r_xprt); if (!mr) return ERR_PTR(-ENOBUFS); } while (mr->frwr.fr_state != FRWR_IS_INVALID); frwr = &mr->frwr; frwr->fr_state = FRWR_IS_VALID; if (nsegs > ia->ri_max_frwr_depth) nsegs = ia->ri_max_frwr_depth; for (i = 0; i < nsegs;) { if (seg->mr_page) sg_set_page(&mr->mr_sg[i], seg->mr_page, seg->mr_len, offset_in_page(seg->mr_offset)); else sg_set_buf(&mr->mr_sg[i], seg->mr_offset, seg->mr_len); ++seg; ++i; if (holes_ok) continue; if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } mr->mr_dir = rpcrdma_data_dir(writing); mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; ibmr = frwr->fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); reg_wr = &frwr->fr_regwr; reg_wr->wr.next = NULL; reg_wr->wr.opcode = IB_WR_REG_MR; frwr->fr_cqe.done = frwr_wc_fastreg; reg_wr->wr.wr_cqe = &frwr->fr_cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = 0; reg_wr->mr = ibmr; reg_wr->key = ibmr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; mr->mr_handle = ibmr->rkey; mr->mr_length = ibmr->length; mr->mr_offset = ibmr->iova; *out = mr; return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", mr->mr_sg, i); frwr->fr_state = FRWR_IS_INVALID; rpcrdma_mr_put(mr); return ERR_PTR(-EIO); out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", frwr->fr_mr, n, mr->mr_nents); rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-EIO); out_senderr: pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc); rpcrdma_mr_defer_recovery(mr); return ERR_PTR(-ENOTCONN); }
static int rdma_request(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_rdma *rdma = client->trans; struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int err = 0; unsigned long flags; struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; /* When an error occurs between posting the recv and the send, * there will be a receive context posted without a pending request. * Since there is no way to "un-post" it, we remember it and skip * post_recv() for the next request. * So here, * see if we are this `next request' and need to absorb an excess rc. * If yes, then drop and free our own, and do not recv_post(). **/ if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { /* Got one ! */ kfree(req->rc); req->rc = NULL; goto dont_need_post_recv; } else { /* We raced and lost. */ atomic_inc(&rdma->excess_rc); } } /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; goto recv_error; } rpl_context->rc = req->rc; /* * Post a receive buffer for this request. We need to ensure * there is a reply buffer available for every outstanding * request. A flushed request can result in no reply for an * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ if (down_interruptible(&rdma->rq_sem)) { err = -EINTR; goto recv_error; } err = post_recv(client, rpl_context); if (err) { p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); goto recv_error; } /* remove posted receive buffer from request structure */ req->rc = NULL; dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { err = -EIO; goto send_error; } sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->pd->local_dma_lkey; wr.next = NULL; c->wc_op = IB_WC_SEND; wr.wr_id = (unsigned long) c; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; wr.num_sge = 1; if (down_interruptible(&rdma->sq_sem)) { err = -EINTR; goto send_error; } /* Mark request as `sent' *before* we actually send it, * because doing if after could erase the REQ_STATUS_RCVD * status in case of a very fast reply. */ req->status = REQ_STATUS_SENT; err = ib_post_send(rdma->qp, &wr, &bad_wr); if (err) goto send_error; /* Success */ return 0; /* Handle errors that happened during or while preparing the send: */ send_error: req->status = REQ_STATUS_ERROR; kfree(c); p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); /* Ach. * We did recv_post(), but not send. We have one recv_post in excess. */ atomic_inc(&rdma->excess_rc); return err; /* Handle errors that happened during or while preparing post_recv(): */ recv_error: kfree(rpl_context); spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); } else spin_unlock_irqrestore(&rdma->req_lock, flags); return err; }
static void bw_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; ccnt = 0; scnt = 0; rcnt = 0; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; cycles_t *last_poll_cycles_start; cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; int err; err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); if (err) { log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__); return; } cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; if (cycle_iters > iters) cycle_iters = iters; microtime(&start_tv); while (scnt < iters || ccnt < iters) { while (scnt < iters && scnt - ccnt < cb->txdepth) { struct ib_send_wr *bad_wr; if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { log(LOG_ERR, "Couldn't post send: scnt=%d\n", scnt); return; } if (scnt < cycle_iters) post_cycles_stop[scnt] = get_cycles(); ++scnt; } if (ccnt < iters) { int ne; struct ib_wc wc; if (ccnt < cycle_iters) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) last_poll_cycles_start[ccnt] = get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) poll_cycles_stop[ccnt] = get_cycles(); ccnt += 1; if (ne < 0) { log(LOG_ERR, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { log(LOG_ERR, "Completion wth error at %s:\n", cb->server ? "server" : "client"); log(LOG_ERR, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; } log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, poll_cycles_stop, last_poll_cycles_start); }
static int rdma_request(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_rdma *rdma = client->trans; struct ib_send_wr wr, *bad_wr; struct ib_sge sge; int err = 0; unsigned long flags; struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; goto err_close; } /* * If the request has a buffer, steal it, otherwise * allocate a new one. Typically, requests should already * have receive buffers allocated and just swap them around */ if (!req->rc) { req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, GFP_NOFS); if (req->rc) { req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); req->rc->capacity = client->msize; } } rpl_context->rc = req->rc; if (!rpl_context->rc) { err = -ENOMEM; goto err_free2; } /* * Post a receive buffer for this request. We need to ensure * there is a reply buffer available for every outstanding * request. A flushed request can result in no reply for an * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { err = post_recv(client, rpl_context); if (err) goto err_free1; } else atomic_dec(&rdma->rq_count); /* remove posted receive buffer from request structure */ req->rc = NULL; /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; goto err_free1; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) goto error; sge.addr = c->busa; sge.length = c->req->tc->size; sge.lkey = rdma->lkey; wr.next = NULL; c->wc_op = IB_WC_SEND; wr.wr_id = (unsigned long) c; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; wr.sg_list = &sge; wr.num_sge = 1; if (down_interruptible(&rdma->sq_sem)) goto error; return ib_post_send(rdma->qp, &wr, &bad_wr); error: kfree(c); kfree(rpl_context->rc); kfree(rpl_context); p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; err_free1: kfree(rpl_context->rc); err_free2: kfree(rpl_context); err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); } else spin_unlock_irqrestore(&rdma->req_lock, flags); return err; }
static void rlat_test(struct krping_cb *cb) { int scnt; int iters = cb->count; struct timeval start_tv, stop_tv; int ret; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; scnt = 0; cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->size; microtime(&start_tv); if (!cb->poll) { cb->state = RDMA_READ_ADV; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } while (scnt < iters) { cb->state = RDMA_READ_ADV; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "Couldn't post send: ret=%d scnt %d\n", ret, scnt); return; } do { if (!cb->poll) { krping_wait(cb, RDMA_READ_COMPLETE); if (cb->state == RDMA_READ_COMPLETE) { ne = 1; ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); } else { ne = -1; } } else ne = ib_poll_cq(cb->cq, 1, &wc); if (cb->state == ERROR) { log(LOG_ERR, "state == ERROR...bailing scnt %d\n", scnt); return; } } while (ne == 0); if (ne < 0) { log(LOG_ERR, "poll CQ failed %d\n", ne); return; } if (cb->poll && wc.status != IB_WC_SUCCESS) { log(LOG_ERR, "Completion wth error at %s:\n", cb->server ? "server" : "client"); log(LOG_ERR, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } ++scnt; } microtime(&stop_tv); if (stop_tv.tv_usec < start_tv.tv_usec) { stop_tv.tv_usec += 1000000; stop_tv.tv_sec -= 1; } log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size); }
static void krping_test_server(struct krping_cb *cb) { struct ib_send_wr *bad_wr; int ret; while (1) { /* Wait for client's Start STAG/TO/Len */ krping_wait(cb, RDMA_READ_ADV); if (cb->state != RDMA_READ_ADV) { DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n", cb->state); break; } DEBUG_LOG(PFX "server received sink adv\n"); /* Issue RDMA Read. */ cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->remote_len; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } DEBUG_LOG(PFX "server posted rdma read req \n"); /* Wait for read completion */ krping_wait(cb, RDMA_READ_COMPLETE); if (cb->state != RDMA_READ_COMPLETE) { log(LOG_ERR, "wait for RDMA_READ_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(PFX "server received read complete\n"); /* Display data in recv buf */ if (cb->verbose) DEBUG_LOG("server ping data: %s\n", cb->rdma_buf); /* Tell client to continue */ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } DEBUG_LOG(PFX "server posted go ahead\n"); /* Wait for client's RDMA STAG/TO/Len */ krping_wait(cb, RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { log(LOG_ERR, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } DEBUG_LOG(PFX "server received sink adv\n"); /* RDMA Write echo data */ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n", cb->rdma_sq_wr.sg_list->lkey, (unsigned long long)cb->rdma_sq_wr.sg_list->addr, cb->rdma_sq_wr.sg_list->length); ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } /* Wait for completion */ krping_wait(cb, RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { log(LOG_ERR, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } DEBUG_LOG(PFX "server rdma write complete \n"); cb->state = CONNECTED; /* Tell client to begin again */ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } DEBUG_LOG(PFX "server posted go ahead\n"); } }
static void krping_rlat_test_client(struct krping_cb *cb) { struct ib_send_wr *bad_wr; struct ib_wc wc; int ret; cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ if (cb->dma_mr) krping_format_send(cb, cb->start_addr, cb->dma_mr); else krping_format_send(cb, cb->start_addr, cb->rdma_mr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { log(LOG_ERR, "poll error %d\n", ret); return; } if (wc.status) { log(LOG_ERR, "send completion error %d\n", wc.status); return; } /* Spin waiting for server's Start STAG/TO/Len */ while (cb->state < RDMA_WRITE_ADV) { krping_cq_event_handler(cb->cq, cb); } #if 0 { int i; struct timeval start, stop; time_t sec; suseconds_t usec; unsigned long long elapsed; struct ib_wc wc; struct ib_send_wr *bad_wr; int ne; cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = 0; cb->rdma_sq_wr.num_sge = 0; microtime(&start); for (i=0; i < 100000; i++) { if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { log(LOG_ERR, "Couldn't post send\n"); return; } do { ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ne < 0) { log(LOG_ERR, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { log(LOG_ERR, "Completion wth error at %s:\n", cb->server ? "server" : "client"); log(LOG_ERR, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } } microtime(&stop); if (stop.tv_usec < start.tv_usec) { stop.tv_usec += 1000000; stop.tv_sec -= 1; } sec = stop.tv_sec - start.tv_sec; usec = stop.tv_usec - start.tv_usec; elapsed = sec * 1000000 + usec; log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed); } #endif rlat_test(cb); }
static void krping_test_client(struct krping_cb *cb) { int ping, start, cc, i, ret; struct ib_send_wr *bad_wr; unsigned char c; start = 65; for (ping = 0; !cb->count || ping < cb->count; ping++) { cb->state = RDMA_READ_ADV; /* Put some ascii text in the buffer. */ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); for (i = cc, c = start; i < cb->size; i++) { cb->start_buf[i] = c; c++; if (c > 122) c = 65; } start++; if (start > 122) start = 65; cb->start_buf[cb->size - 1] = 0; if (cb->dma_mr) krping_format_send(cb, cb->start_addr, cb->dma_mr); else krping_format_send(cb, cb->start_addr, cb->start_mr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } /* Wait for server to ACK */ krping_wait(cb, RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { log(LOG_ERR, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } if (cb->dma_mr) krping_format_send(cb, cb->rdma_addr, cb->dma_mr); else krping_format_send(cb, cb->rdma_addr, cb->rdma_mr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { log(LOG_ERR, "post send error %d\n", ret); break; } /* Wait for the server to say the RDMA Write is complete. */ krping_wait(cb, RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { log(LOG_ERR, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } if (cb->validate) if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { log(LOG_ERR, "data mismatch!\n"); break; } if (cb->verbose) DEBUG_LOG("ping data: %s\n", cb->rdma_buf); } }
void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_buf *tx_req; struct sdp_bsdh *h; unsigned long mseq; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge; struct ib_send_wr tx_wr = { NULL }; int i, rc; u64 addr; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, mb->len); if (!ssk->qp_active) { m_freem(mb); return; } mseq = ring_head(ssk->tx_ring); h = mtod(mb, struct sdp_bsdh *); ssk->tx_packets++; ssk->tx_bytes += mb->m_pkthdr.len; #ifdef SDP_ZCOPY if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(ssk->socket, "SrcAvail cancelled " "before being sent!\n"); WARN_ON(1); m_freem(mb); return; } TX_SRCAVAIL_STATE(mb)->mseq = mseq; } #endif if (unlikely(mb->m_flags & M_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */ h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(mb->m_pkthdr.len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack)); SDP_DUMP_PACKET(ssk->socket, "TX", mb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->mb = mb; dev = ssk->ib_device; sge = &ibsge[0]; for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) { addr = ib_dma_map_single(dev, mb->m_data, mb->m_len, DMA_TO_DEVICE); /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); BUG_ON(i >= SDP_MAX_SEND_SGES); tx_req->mapping[i] = addr; sge->addr = addr; sge->length = mb->m_len; sge->lkey = ssk->sdp_dev->mr->lkey; } tx_wr.next = NULL; tx_wr.wr_id = mseq | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = i; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = IB_SEND_SIGNALED; if (unlikely(tx_req->mb->m_flags & M_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(ssk->socket, "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); sdp_notify(ssk, ECONNRESET); m_freem(tx_req->mb); return; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; }