static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { struct iscsi_iser_conn *iser_conn = conn->dd_data; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); /* check if this is the last login - going to full feature phase */ if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) return 0; /* * Check that there is one posted recv buffer (for the last login * response) and no posted send buffers left - they must have been * consumed during previous login phases. */ WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); /* Initial post receive buffers */ if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) return -ENOMEM; return 0; }
static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); error = iser_send_control(conn, task); if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; }
static int iscsi_iser_task_xmit(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; struct iscsi_iser_task *iser_task = task->dd_data; int error = 0; if (!task->sc) return iscsi_iser_mtask_xmit(conn, task); if (task->sc->sc_data_direction == DMA_TO_DEVICE) { BUG_ON(scsi_bufflen(task->sc) == 0); iser_dbg("cmd [itt %x total %d imm %d unsol_data %d\n", task->itt, scsi_bufflen(task->sc), task->imm_count, task->unsol_r2t.data_length); } iser_dbg("ctask xmit [cid %d itt 0x%x]\n", conn->id, task->itt); /* Send the cmd PDU */ if (!iser_task->command_sent) { error = iser_send_command(conn, task); if (error) goto iscsi_iser_task_xmit_exit; iser_task->command_sent = 1; } /* Send unsolicited data-out PDU(s) if necessary */ if (iscsi_task_has_unsol_data(task)) error = iscsi_iser_task_xmit_unsol_data(conn, task); iscsi_iser_task_xmit_exit: return error; }
/** * iscsi_iser_mtask_xmit() - xmit management (immediate) task * @conn: iscsi connection * @task: task management task * * Notes: * The function can return -EAGAIN in which case caller must * call it again later, or recover. '0' return code means successful * xmit. * **/ static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt); error = iser_send_control(conn, task); /* since iser xmits control with zero copy, tasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected * - if yes, the task is recycled at iscsi_complete_pdu * - if no, the task is recycled at iser_snd_completion */ return error; }
/** * Unregister (previosuly registered using FMR) memory. * If memory is non-FMR does nothing. */ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; int ret; if (!reg->mem_h) return; iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h); ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); if (ret) iser_err("ib_fmr_pool_unmap failed %d\n", ret); reg->mem_h = NULL; }
static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, struct iser_reg_resources *rsc, struct iser_mem_reg *reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; struct ib_reg_wr *wr; int n; if (rsc->mr_valid) iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); if (unlikely(n != mem->size)) { iser_err("failed to map sg (%d/%d)\n", n, mem->size); return n < 0 ? n : -EINVAL; } wr = reg_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; wr->wr.num_sge = 0; wr->mr = mr; wr->key = mr->rkey; wr->access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; rsc->mr_valid = 1; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; reg->sge.addr = mr->iova; reg->sge.length = mr->length; iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n", reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); return 0; }
/* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_IN].data_len, Protection size * os stored in task->prot[ISER_DIR_IN].data_len */ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; if (scsi_prot_sg_count(iser_task->sc)) { struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, pbuf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; } err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_va = cpu_to_be64(regd_buf->reg.va); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", task->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va); return 0; }
/* creates a new tx descriptor and adds header regd buffer */ static void iser_create_send_desc(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; ib_dma_sync_single_for_cpu(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); tx_desc->iser_header.flags = ISER_VER; tx_desc->num_sge = 1; if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { tx_desc->tx_sg[0].lkey = device->mr->lkey; iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); } }
/* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_IN].data_len, Protection size * os stored in task->prot[ISER_DIR_IN].data_len */ static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_mem_reg *mem_reg; int err; struct iser_ctrl *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; if (scsi_prot_sg_count(iser_task->sc)) { struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, pbuf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; } err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(mem_reg->rkey); hdr->read_va = cpu_to_be64(mem_reg->sge.addr); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", task->itt, mem_reg->rkey, (unsigned long long)mem_reg->sge.addr); return 0; }
static int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, struct iser_data_buf *mem, struct iser_reg_resources *rsc, struct iser_mem_reg *reg) { struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct iser_page_vec *page_vec = rsc->page_vec; struct ib_fmr_pool *fmr_pool = rsc->fmr_pool; struct ib_pool_fmr *fmr; int ret, plen; page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, mem->size, NULL, iser_set_page); if (unlikely(plen < mem->size)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); iser_dump_page_vec(page_vec); return -EINVAL; } fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages, page_vec->npages, page_vec->pages[0]); if (IS_ERR(fmr)) { ret = PTR_ERR(fmr); iser_err("ib_fmr_pool_map_phys failed: %d\n", ret); return ret; } reg->sge.lkey = fmr->fmr->lkey; reg->rkey = fmr->fmr->rkey; reg->sge.addr = page_vec->fake_mr.iova; reg->sge.length = page_vec->fake_mr.length; reg->mem_h = fmr; iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," " length=0x%x\n", reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); return 0; }
/** * iser_reg_page_vec - Register physical memory * * returns: 0 on success, errno code on failure */ int iser_reg_page_vec(struct iser_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg) { struct ib_pool_fmr *mem; u64 io_addr; u64 *page_list; int status; page_list = page_vec->pages; io_addr = page_list[0]; mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, page_list, page_vec->length, io_addr); if (IS_ERR(mem)) { status = (int)PTR_ERR(mem); iser_err("ib_fmr_pool_map_phys failed: %d\n", status); return status; } mem_reg->lkey = mem->fmr->lkey; mem_reg->rkey = mem->fmr->rkey; mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; mem_reg->is_fmr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; mem_reg->len = page_vec->data_size; iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " "entry[0]: (0x%08lx,%ld)] -> " "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", page_vec, page_vec->length, (unsigned long)page_vec->pages[0], (unsigned long)page_vec->data_size, (unsigned int)mem_reg->lkey, mem_reg->mem_h, (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); return 0; }
/* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_ctask->data[ISER_DIR_IN].data_len */ static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, unsigned int edtl) { struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_ctask->desc.iser_header; struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_ctask, buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", iser_ctask->data[ISER_DIR_IN].data_len, edtl, ctask->itt, iser_ctask->iser_conn); return -EINVAL; } err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; } regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; hdr->flags |= ISER_RSV; hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_va = cpu_to_be64(regd_buf->reg.va); iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", ctask->itt, regd_buf->reg.rkey, (unsigned long long)regd_buf->reg.va); return 0; }
/** * iscsi_iser_mtask_xmit - xmit management(immediate) task * @conn: iscsi connection * @task: task management task * * Notes: * The function can return -EAGAIN in which case caller must * call it again later, or recover. '0' return code means successful * xmit. * **/ static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) { int error = 0; iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt); error = iser_send_control(conn, task); /* since iser xmits control with zero copy, tasks can not be recycled * right after sending them. * The recycling scheme is based on whether a response is expected * - if yes, the task is recycled at iscsi_complete_pdu * - if no, the task is recycled at iser_snd_completion */ if (error && error != -ENOBUFS) iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); return error; }
/** * iser_conn_set_full_featured_mode - (iSER API) */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) { struct iscsi_iser_conn *iser_conn = conn->dd_data; iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); /* Check that there is no posted recv or send buffers left - */ /* they must be consumed during the login phase */ BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0); BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) return -ENOMEM; /* Initial post receive buffers */ if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) return -ENOMEM; return 0; }
static int __init iser_init(void) { int err; iser_dbg("Starting iSER datamover...\n"); if (iscsi_max_lun < 1) { iser_err("Invalid max_lun value of %u\n", iscsi_max_lun); return -EINVAL; } memset(&ig, 0, sizeof(struct iser_global)); ig.desc_cache = kmem_cache_create("iser_descriptors", sizeof(struct iser_tx_desc), 0, SLAB_HWCACHE_ALIGN, NULL); if (ig.desc_cache == NULL) return -ENOMEM; /* device init is called only after the first addr resolution */ mutex_init(&ig.device_list_mutex); INIT_LIST_HEAD(&ig.device_list); mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); iscsi_iser_scsi_transport = iscsi_register_transport( &iscsi_iser_transport); if (!iscsi_iser_scsi_transport) { iser_err("iscsi_register_transport failed\n"); err = -EINVAL; goto register_transport_failure; } return 0; register_transport_failure: kmem_cache_destroy(ig.desc_cache); return err; }
static int iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, struct iser_mem_reg *reg) { struct scatterlist *sg = mem->sg; reg->sge.lkey = device->pd->local_dma_lkey; /* * FIXME: rework the registration code path to differentiate * rkey/lkey use cases */ reg->rkey = device->mr ? device->mr->rkey : 0; reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx," " length=0x%x\n", reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); return 0; }
static int iser_check_remote_inv(struct iser_conn *iser_conn, struct ib_wc *wc, struct iscsi_hdr *hdr) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { struct iscsi_task *task; u32 rkey = wc->ex.invalidate_rkey; iser_dbg("conn %p: remote invalidation for rkey %#x\n", iser_conn, rkey); if (unlikely(!iser_conn->snd_w_inv)) { iser_err("conn %p: unexepected remote invalidation, " "terminating connection\n", iser_conn); return -EPROTO; } task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt); if (likely(task)) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_fr_desc *desc; if (iser_task->dir[ISER_DIR_IN]) { desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; iser_inv_desc(desc, rkey); } if (iser_task->dir[ISER_DIR_OUT]) { desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; iser_inv_desc(desc, rkey); } } else { iser_err("failed to get task for itt=%d\n", hdr->itt); return -EINVAL; } } return 0; }
static void iser_cq_tasklet_fn(unsigned long data) { struct iser_device *device = (struct iser_device *)data; struct ib_cq *cq = device->rx_cq; struct ib_wc wc; struct iser_rx_desc *desc; unsigned long xfer_len; struct iser_conn *ib_conn; int completed_tx, completed_rx; completed_tx = completed_rx = 0; while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; BUG_ON(desc == NULL); ib_conn = wc.qp->qp_context; if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_RECV) { xfer_len = (unsigned long)wc.byte_len; iser_rcv_completion(desc, xfer_len, ib_conn); } else iser_err("expected opcode %d got %d\n", IB_WC_RECV, wc.opcode); } else { if (wc.status != IB_WC_WR_FLUSH_ERR) iser_err("rx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); ib_conn->post_recv_buf_count--; iser_handle_comp_error(NULL, ib_conn); } completed_rx++; if (!(completed_rx & 63)) completed_tx += iser_drain_tx_cq(device); } /* #warning "it is assumed here that arming CQ only once its empty" * * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); completed_tx += iser_drain_tx_cq(device); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); }
/** * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses * and returns the length of resulting physical address array (may be less than * the original due to possible compaction). * * we build a "page vec" under the assumption that the SG meets the RDMA * alignment requirements. Other then the first and last SG elements, all * the "internal" elements can be compacted into a list whose elements are * dma addresses of physical pages. The code supports also the weird case * where --few fragments of the same page-- are present in the SG as * consecutive elements. Also, it handles one entry SG. */ static int iser_sg_to_page_vec(struct iser_data_buf *data, struct iser_page_vec *page_vec) { struct scatterlist *sg = (struct scatterlist *)data->buf; dma_addr_t first_addr, last_addr, page; int start_aligned, end_aligned; unsigned int cur_page = 0; unsigned long total_sz = 0; int i; /* compute the offset of first element */ page_vec->offset = (u64) sg[0].offset; for (i = 0; i < data->dma_nents; i++) { total_sz += sg_dma_len(&sg[i]); first_addr = sg_dma_address(&sg[i]); last_addr = first_addr + sg_dma_len(&sg[i]); start_aligned = !(first_addr & ~PAGE_MASK); end_aligned = !(last_addr & ~PAGE_MASK); /* continue to collect page fragments till aligned or SG ends */ while (!end_aligned && (i + 1 < data->dma_nents)) { i++; total_sz += sg_dma_len(&sg[i]); last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); end_aligned = !(last_addr & ~PAGE_MASK); } first_addr = first_addr & PAGE_MASK; for (page = first_addr; page < last_addr; page += PAGE_SIZE) page_vec->pages[cur_page++] = page; } page_vec->data_size = total_sz; iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); return cur_page; }
/** * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned * for RDMA sub-list of a scatter-gather list of memory buffers, and returns * the number of entries which are aligned correctly. Supports the case where * consecutive SG elements are actually fragments of the same physcial page. */ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev) { struct scatterlist *sg; u64 end_addr, next_addr; int i, cnt; unsigned int ret_len = 0; sg = (struct scatterlist *)data->buf; for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) { /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " "offset: %ld sz: %ld\n", i, (unsigned long)page_to_phys(sg[i].page), (unsigned long)sg[i].offset, (unsigned long)sg[i].length); */ end_addr = ib_sg_dma_address(ibdev, &sg[i]) + ib_sg_dma_len(ibdev, &sg[i]); /* iser_dbg("Checking sg iobuf end address " "0x%08lX\n", end_addr); */ if (i + 1 < data->dma_nents) { next_addr = ib_sg_dma_address(ibdev, &sg[i+1]); /* are i, i+1 fragments of the same page? */ if (end_addr == next_addr) continue; else if (!IS_4K_ALIGNED(end_addr)) { ret_len = cnt + 1; break; } } } if (i == data->dma_nents) ret_len = cnt; /* loop ended */ iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", ret_len, data->dma_nents, data); return ret_len; }
/** * iser_reg_rdma_mem - Registers memory intended for RDMA, * obtaining rkey and va * * returns 0 on success, errno code on failure */ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; struct iser_regd_buf *regd_buf; int aligned_len; int err; int i; struct scatterlist *sg; regd_buf = &iser_ctask->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ iser_dma_unmap_task_data(iser_ctask); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) return -ENOMEM; mem = &iser_ctask->data_copy[cmd_dir]; } /* if there a single dma entry, FMR is not needed */ if (mem->dma_nents == 1) { sg = (struct scatterlist *)mem->buf; regd_buf->reg.lkey = device->mr->lkey; regd_buf->reg.rkey = device->mr->rkey; regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); regd_buf->reg.is_fmr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " "va: 0x%08lX sz: %ld]\n", (unsigned int)regd_buf->reg.lkey, (unsigned int)regd_buf->reg.rkey, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ iser_page_vec_build(mem, ib_conn->page_vec, ibdev); err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); if (err) { iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, ntoh24(iser_ctask->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", ib_conn->page_vec->data_size, ib_conn->page_vec->length, ib_conn->page_vec->offset); for (i=0 ; i<ib_conn->page_vec->length ; i++) iser_err("page_vec[%d] = 0x%llx\n", i, (unsigned long long) ib_conn->page_vec->pages[i]); return err; } } /* take a reference on this regd buf such that it will not be released * * (eg in send dto completion) before we get the scsi response */ atomic_inc(®d_buf->ref_count); return 0; }
/** * iser_rcv_dto_completion - recv DTO completion */ void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { struct iser_dto *dto = &rx_desc->dto; struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; struct iscsi_task *task; struct iscsi_iser_task *iser_task; struct iscsi_hdr *hdr; char *rx_data = NULL; int rx_data_len = 0; unsigned char opcode; hdr = &rx_desc->iscsi_header; iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; rx_data = dto->regd[1]->virt_addr; rx_data += dto->offset[1]; } opcode = hdr->opcode & ISCSI_OPCODE_MASK; if (opcode == ISCSI_OP_SCSI_CMD_RSP) { spin_lock(&conn->iscsi_conn->session->lock); task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt); if (task) __iscsi_get_task(task); spin_unlock(&conn->iscsi_conn->session->lock); if (!task) iser_err("itt can't be matched to task!!! " "conn %p opcode %d itt %d\n", conn->iscsi_conn, opcode, hdr->itt); else { iser_task = task->dd_data; iser_dbg("itt %d task %p\n",hdr->itt, task); iser_task->status = ISER_TASK_STATUS_COMPLETED; iser_task_rdma_finalize(iser_task); iscsi_put_task(task); } } iser_dto_buffs_release(dto); iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); kfree(rx_desc->data); kmem_cache_free(ig.desc_cache, rx_desc); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ atomic_dec(&conn->ib_conn->post_recv_buf_count); /* * if an unexpected PDU was received then the recv wr consumed must * be replaced, this is done in the next send of a control-type PDU */ if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { /* nop-in with itt = 0xffffffff */ atomic_inc(&conn->ib_conn->unexpected_pdu_count); } else if (opcode == ISCSI_OP_ASYNC_EVENT) { /* asyncronous message */ atomic_inc(&conn->ib_conn->unexpected_pdu_count); } /* a reject PDU consumes the recv buf posted for the response */ }
/* Register user buffer memory and initialize passive rdma * dto descriptor. Data size is stored in * task->data[ISER_DIR_OUT].data_len, Protection size * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, unsigned int unsol_sz, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_mem_reg *mem_reg; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; err = iser_dma_map_task_data(iser_task, buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; if (scsi_prot_sg_count(iser_task->sc)) { struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; err = iser_dma_map_task_data(iser_task, pbuf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; } err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; if (unsol_sz < edtl) { hdr->flags |= ISER_WSV; hdr->write_stag = cpu_to_be32(mem_reg->rkey); hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " "VA:%#llX + unsol:%d\n", task->itt, mem_reg->rkey, (unsigned long long)mem_reg->sge.addr, unsol_sz); } if (imm_sz > 0) { iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", task->itt, imm_sz); tx_dsg->addr = mem_reg->sge.addr; tx_dsg->length = imm_sz; tx_dsg->lkey = mem_reg->sge.lkey; iser_task->desc.num_sge = 2; } return 0; }
/** * iser_send_data_out - send data out PDU */ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_mem_reg *mem_reg; unsigned long buf_offset; unsigned long data_seg_len; uint32_t itt; int err; struct ib_sge *tx_dsg; itt = (__force uint32_t)hdr->itt; data_seg_len = ntoh24(hdr->dlength); buf_offset = ntohl(hdr->offset); iser_dbg("%s itt %d dseg_len %d offset %d\n", __func__,(int)itt,(int)data_seg_len,(int)buf_offset); tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC); if (tx_desc == NULL) { iser_err("Failed to alloc desc for post dataout\n"); return -ENOMEM; } tx_desc->type = ISCSI_TX_DATAOUT; tx_desc->iser_header.flags = ISER_VER; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); /* build the tx desc */ err = iser_initialize_task_headers(task, tx_desc); if (err) goto send_data_out_error; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; tx_dsg = &tx_desc->tx_sg[1]; tx_dsg->addr = mem_reg->sge.addr + buf_offset; tx_dsg->length = data_seg_len; tx_dsg->lkey = mem_reg->sge.lkey; tx_desc->num_sge = 2; if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " "inconsistent with total len:%ld, itt:%d\n", buf_offset, data_seg_len, iser_task->data[ISER_DIR_OUT].data_len, itt); err = -EINVAL; goto send_data_out_error; } iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n", itt, buf_offset, data_seg_len); err = iser_post_send(&iser_conn->ib_conn, tx_desc, true); if (!err) return 0; send_data_out_error: kmem_cache_free(ig.desc_cache, tx_desc); iser_err("conn %p failed err %d\n", conn, err); return err; }
/** * iser_send_data_out - send data out PDU */ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, struct iscsi_data *hdr) { struct iscsi_iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; struct iser_desc *tx_desc = NULL; struct iser_dto *send_dto = NULL; unsigned long buf_offset; unsigned long data_seg_len; unsigned int itt; int err = 0; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); return -EPERM; } if (iser_check_xmit(conn, ctask)) return -ENOBUFS; itt = ntohl(hdr->itt); data_seg_len = ntoh24(hdr->dlength); buf_offset = ntohl(hdr->offset); iser_dbg("%s itt %d dseg_len %d offset %d\n", __func__,(int)itt,(int)data_seg_len,(int)buf_offset); tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); if (tx_desc == NULL) { iser_err("Failed to alloc desc for post dataout\n"); return -ENOMEM; } tx_desc->type = ISCSI_TX_DATAOUT; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); /* build the tx desc regd header and add it to the tx desc dto */ send_dto = &tx_desc->dto; send_dto->ctask = iser_ctask; iser_create_send_desc(iser_conn, tx_desc); iser_reg_single(iser_conn->ib_conn->device, send_dto->regd[0], DMA_TO_DEVICE); /* all data was registered for RDMA, we can use the lkey */ iser_dto_add_regd_buff(send_dto, &iser_ctask->rdma_regd[ISER_DIR_OUT], buf_offset, data_seg_len); if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { iser_err("Offset:%ld & DSL:%ld in Data-Out " "inconsistent with total len:%ld, itt:%d\n", buf_offset, data_seg_len, iser_ctask->data[ISER_DIR_OUT].data_len, itt); err = -EINVAL; goto send_data_out_error; } iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n", itt, buf_offset, data_seg_len); err = iser_post_send(tx_desc); if (!err) return 0; send_data_out_error: iser_dto_buffs_release(send_dto); kmem_cache_free(ig.desc_cache, tx_desc); iser_err("conn %p failed err %d\n",conn, err); return err; }
int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; struct iser_device *device; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; mdesc->cqe.done = iser_ctrl_comp; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { struct iser_login_desc *desc = &iser_conn->login_desc; struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; if (task != conn->login_task) { iser_err("data present on non login task!!!\n"); goto send_control_error; } ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma, task->data_count, DMA_TO_DEVICE); memcpy(desc->req, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, desc->req_dma, task->data_count, DMA_TO_DEVICE); tx_dsg->addr = desc->req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->pd->local_dma_lkey; mdesc->num_sge = 2; } if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); err = iser_post_recvl(iser_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); if (err) goto send_control_error; } err = iser_post_send(&iser_conn->ib_conn, mdesc, true); if (!err) return 0; send_control_error: iser_err("conn %p failed err %d\n",conn, err); return err; }
static void __exit iser_exit(void) { iser_dbg("Removing iSER datamover...\n"); iscsi_unregister_transport(&iscsi_iser_transport); kmem_cache_destroy(ig.desc_cache); }