static void rpcrdma_recvcq_process_wc(struct ib_wc *wc) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", __func__, rep, wc->status, wc->opcode, wc->byte_len); if (wc->status != IB_WC_SUCCESS) { rep->rr_len = ~0U; goto out_schedule; } if (wc->opcode != IB_WC_RECV) return; rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); if (rep->rr_len >= 16) { struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; unsigned int credits = ntohl(p->rm_credit); if (credits == 0) credits = 1; /* don't deadlock */ else if (credits > rep->rr_buffer->rb_max_requests) credits = rep->rr_buffer->rb_max_requests; atomic_set(&rep->rr_buffer->rb_credits, credits); } out_schedule: rpcrdma_schedule_tasklet(rep); }
int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; struct iser_device *device; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; if (task != conn->login_task) { iser_err("data present on non login task!!!\n"); goto send_control_error; } ib_dma_sync_single_for_cpu(device->ib_device, iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); memcpy(iser_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, iser_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); tx_dsg->addr = iser_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->pd->local_dma_lkey; mdesc->num_sge = 2; } if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); err = iser_post_recvl(iser_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); if (err) goto send_control_error; } err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return 0; send_control_error: iser_err("conn %p failed err %d\n",conn, err); return err; }
void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) { struct ib_conn *ib_conn = wc->qp->qp_context; struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_login_desc *desc = iser_login(wc->wr_cqe); struct iscsi_hdr *hdr; char *data; int length; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "login_rsp"); return; } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, desc->rsp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); hdr = desc->rsp + sizeof(struct iser_ctrl); data = desc->rsp + ISER_HEADERS_LEN; length = wc->byte_len - ISER_HEADERS_LEN; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, length); iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length); ib_dma_sync_single_for_device(ib_conn->device->ib_device, desc->rsp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); ib_conn->post_recv_buf_count--; }
static void rpcrdma_recvcq_process_wc(struct ib_wc *wc) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; /* WARNING: Only wr_id and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) goto out_fail; /* status == SUCCESS means all fields in wc are trustworthy */ if (wc->opcode != IB_WC_RECV) return; dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: queue_work(rpcrdma_receive_wq, &rep->rr_work); return; out_fail: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("RPC: %s: rep %p: %s\n", __func__, rep, ib_wc_status_msg(wc->status)); rep->rr_len = RPCRDMA_BAD_LEN; goto out_schedule; }
static inline void rpcrdma_event_process(struct ib_wc *wc) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long) wc->wr_id; dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", __func__, rep, wc->status, wc->opcode, wc->byte_len); if (!rep) /* send or bind completion that we don't care about */ return; if (IB_WC_SUCCESS != wc->status) { dprintk("RPC: %s: %s WC status %X, connection lost\n", __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", wc->status); rep->rr_len = ~0U; rpcrdma_schedule_tasklet(rep); return; } switch (wc->opcode) { case IB_WC_RECV: rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu( rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); /* Keep (only) the most recent credits, after check validity */ if (rep->rr_len >= 16) { struct rpcrdma_msg *p = (struct rpcrdma_msg *) rep->rr_base; unsigned int credits = ntohl(p->rm_credit); if (credits == 0) { dprintk("RPC: %s: server" " dropped credits to 0!\n", __func__); /* don't deadlock */ credits = 1; } else if (credits > rep->rr_buffer->rb_max_requests) { dprintk("RPC: %s: server" " over-crediting: %d (%d)\n", __func__, credits, rep->rr_buffer->rb_max_requests); credits = rep->rr_buffer->rb_max_requests; } atomic_set(&rep->rr_buffer->rb_credits, credits); } /* fall through */ case IB_WC_BIND_MW: rpcrdma_schedule_tasklet(rep); break; default: dprintk("RPC: %s: unexpected WC event %X\n", __func__, wc->opcode); break; } }
static void isert_dma_sync_data_for_cpu(struct ib_device *ib_dev, struct ib_sge *sge, size_t size) { size_t to_sync = size > (PAGE_SIZE - ISER_HDRS_SZ) ? (PAGE_SIZE - ISER_HDRS_SZ) : size; ib_dma_sync_single_for_cpu(ib_dev, sge->addr + ISER_HDRS_SZ, to_sync, DMA_FROM_DEVICE); size -= to_sync; while (size) { ++sge; to_sync = size > PAGE_SIZE ? PAGE_SIZE : size; ib_dma_sync_single_for_cpu(ib_dev, sge->addr, to_sync, DMA_FROM_DEVICE); size -= to_sync; } }
/* creates a new tx descriptor and adds header regd buffer */ static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; }
/** * iser_rcv_dto_completion - recv DTO completion */ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct ib_conn *ib_conn) { struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; /* differentiate between login to all other PDUs */ if ((char *)rx_desc == iser_conn->login_resp_buf) { rx_dma = iser_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; rx_buflen = ISER_RX_PAYLOAD_SIZE; } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); hdr = &rx_desc->iscsi_header; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; if (rx_dma == iser_conn->login_resp_dma) return; outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, iser_conn->min_posted_rx); err = iser_post_recvm(iser_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); } }
void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) { struct ib_conn *ib_conn = wc->qp->qp_context; struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); struct iscsi_hdr *hdr; int length; int outstanding, count, err; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "task_rsp"); return; } ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); hdr = &desc->iscsi_header; length = wc->byte_len - ISER_HEADERS_LEN; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, length); if (iser_check_remote_inv(iser_conn, wc, hdr)) { iscsi_conn_failure(iser_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); return; } iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length); ib_dma_sync_single_for_device(ib_conn->device->ib_device, desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, iser_conn->min_posted_rx); err = iser_post_recvm(iser_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); } }
/* creates a new tx descriptor and adds header regd buffer */ static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { tx_desc->tx_sg[0].lkey = device->mr->lkey; iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); } }
/** * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: Completion Queue context * @wc: Work Completion object * * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that * the Receive completion handler could be running. */ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct svcxprt_rdma *rdma = cq->cq_context; struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_recv_ctxt *ctxt; trace_svcrdma_wc_receive(wc); /* WARNING: Only wc->wr_cqe and wc->status are reliable */ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); if (wc->status != IB_WC_SUCCESS) goto flushed; if (svc_rdma_post_recv(rdma)) goto post_err; /* All wc fields are now known to be valid */ ctxt->rc_byte_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, wc->byte_len, DMA_FROM_DEVICE); spin_lock(&rdma->sc_rq_dto_lock); list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); spin_unlock(&rdma->sc_rq_dto_lock); set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) svc_xprt_enqueue(&rdma->sc_xprt); goto out; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); post_err: svc_rdma_recv_ctxt_put(rdma, ctxt); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); svc_xprt_enqueue(&rdma->sc_xprt); out: svc_xprt_put(&rdma->sc_xprt); }
/** * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: completion queue (ignored) * @wc: completed WR * */ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, rr_cqe); /* WARNING: Only wr_id and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS) goto out_fail; /* status == SUCCESS means all fields in wc are trustworthy */ if (wc->opcode != IB_WC_RECV) return; dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; rep->rr_wc_flags = wc->wc_flags; rep->rr_inv_rkey = wc->ex.invalidate_rkey; ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); rpcrdma_update_granted_credits(rep); out_schedule: queue_work(rpcrdma_receive_wq, &rep->rr_work); return; out_fail: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); rep->rr_len = RPCRDMA_BAD_LEN; goto out_schedule; }
static void isert_recv_completion_handler(struct isert_wr *wr) { struct isert_cmnd *pdu = wr->pdu; struct ib_sge *sge = wr->sge_list; struct ib_device *ib_dev = wr->isert_dev->ib_dev; int err; TRACE_ENTRY(); ib_dma_sync_single_for_cpu(ib_dev, sge->addr, ISER_HDRS_SZ, DMA_FROM_DEVICE); isert_rx_pdu_parse_headers(pdu); isert_dma_sync_data_for_cpu(ib_dev, sge, pdu->iscsi.pdu.datasize + pdu->iscsi.pdu.ahssize); switch (pdu->isert_opcode) { case ISER_ISCSI_CTRL: switch (pdu->iscsi_opcode) { case ISCSI_OP_NOP_OUT: err = isert_pdu_handle_nop_out(pdu); break; case ISCSI_OP_SCSI_CMD: err = isert_pdu_handle_scsi_cmd(pdu); break; case ISCSI_OP_SCSI_TASK_MGT_MSG: err = isert_pdu_handle_tm_func(pdu); break; case ISCSI_OP_LOGIN_CMD: err = isert_pdu_handle_login_req(pdu); break; case ISCSI_OP_TEXT_CMD: err = isert_pdu_handle_text(pdu); break; case ISCSI_OP_SCSI_DATA_OUT: err = isert_pdu_handle_data_out(pdu); break; case ISCSI_OP_LOGOUT_CMD: err = isert_pdu_handle_logout(pdu); break; case ISCSI_OP_SNACK_CMD: err = isert_pdu_handle_snack(pdu); break; default: pr_err("Unexpected iscsi opcode:0x%x\n", pdu->iscsi_opcode); err = -EINVAL; break; } break; case ISER_HELLO: err = isert_pdu_handle_hello_req(pdu); break; default: pr_err("malformed isert_hdr, iser op:%x flags 0x%02x\n", pdu->isert_opcode, pdu->isert_hdr->flags); err = -EINVAL; break; } if (unlikely(err)) { pr_err("err:%d while handling iser pdu\n", err); isert_conn_disconnect(wr->conn); } TRACE_EXIT(); }
int vnic_rx_skb(struct vnic_login *login, struct vnic_rx_ring *ring, struct ib_wc *wc, int ip_summed, char *eth_hdr_va) { u64 wr_id = (unsigned int)wc->wr_id; struct sk_buff *skb; int used_frags; char *va = eth_hdr_va; int length = wc->byte_len - VNIC_EOIB_HDR_SIZE - VNIC_VLAN_OFFSET(login), linear_length = (length <= SMALL_PACKET_SIZE) ? length : SMALL_PACKET_SIZE, hdr_len = min(length, HEADER_COPY_SIZE), offest = NET_IP_ALIGN + 16; struct ib_device *ib_dev = login->port->dev->ca; /* alloc a small linear SKB */ skb = alloc_skb(linear_length + offest, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_record_rx_queue(skb, ring->index); skb_reserve(skb, offest); if (vnic_linear_small_pkt && length <= SMALL_PACKET_SIZE) { u64 dma; /* We are copying all relevant data to the skb - temporarily * synch buffers for the copy */ dma = ring->rx_info[wr_id].dma_addr[0] + VNIC_EOIB_HDR_SIZE + VNIC_VLAN_OFFSET(login); ib_dma_sync_single_for_cpu(ib_dev, dma, length, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, va, length); ib_dma_sync_single_for_device(ib_dev, dma, length, DMA_FROM_DEVICE); skb->tail += length; } else { /* unmap the needed fragmentand reallocate them. Fragments that * were not used will not be reused as is. */ used_frags = vnic_unmap_and_replace_rx(ring, ib_dev, skb_shinfo(skb)->frags, wr_id, wc->byte_len); if (!used_frags) goto free_and_repost; skb_shinfo(skb)->nr_frags = used_frags; /* Copy headers into the skb linear buffer */ memcpy(skb->data, va, hdr_len); skb->tail += hdr_len; /* Skip headers in first fragment */ skb_shinfo(skb)->frags[0].page_offset += (VNIC_EOIB_HDR_SIZE + VNIC_VLAN_OFFSET(login) + hdr_len); /* Adjust size of first fragment */ skb_shinfo(skb)->frags[0].size -= (VNIC_EOIB_HDR_SIZE + VNIC_VLAN_OFFSET(login) + hdr_len); skb->data_len = length - hdr_len; } /* update skb fields */ skb->len = length; skb->truesize = length + sizeof(struct sk_buff); skb->ip_summed = ip_summed; skb->dev = login->dev; skb->protocol = eth_type_trans(skb, skb->dev); return vnic_rx(login, skb, wc); free_and_repost: dev_kfree_skb(skb); return -ENODEV; }