Exemple #1
0
/**
 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
 * and returns the length of resulting physical address array (may be less than
 * the original due to possible compaction).
 *
 * we build a "page vec" under the assumption that the SG meets the RDMA
 * alignment requirements. Other then the first and last SG elements, all
 * the "internal" elements can be compacted into a list whose elements are
 * dma addresses of physical pages. The code supports also the weird case
 * where --few fragments of the same page-- are present in the SG as
 * consecutive elements. Also, it handles one entry SG.
 */
static int iser_sg_to_page_vec(struct iser_data_buf *data,
			       struct iser_page_vec *page_vec,
			       struct ib_device *ibdev)
{
	struct scatterlist *sg = (struct scatterlist *)data->buf;
	u64 first_addr, last_addr, page;
	int end_aligned;
	unsigned int cur_page = 0;
	unsigned long total_sz = 0;
	int i;

	/* compute the offset of first element */
	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;

	for (i = 0; i < data->dma_nents; i++) {
		unsigned int dma_len = ib_sg_dma_len(ibdev, &sg[i]);

		total_sz += dma_len;

		first_addr = ib_sg_dma_address(ibdev, &sg[i]);
		last_addr  = first_addr + dma_len;

		end_aligned   = !(last_addr  & ~MASK_4K);

		/* continue to collect page fragments till aligned or SG ends */
		while (!end_aligned && (i + 1 < data->dma_nents)) {
			i++;
			dma_len = ib_sg_dma_len(ibdev, &sg[i]);
			total_sz += dma_len;
			last_addr = ib_sg_dma_address(ibdev, &sg[i]) + dma_len;
			end_aligned = !(last_addr  & ~MASK_4K);
		}

		/* handle the 1st page in the 1st DMA element */
		if (cur_page == 0) {
			page = first_addr & MASK_4K;
			page_vec->pages[cur_page] = page;
			cur_page++;
			page += SIZE_4K;
		} else
			page = first_addr;

		for (; page < last_addr; page += SIZE_4K) {
			page_vec->pages[cur_page] = page;
			cur_page++;
		}

	}
	page_vec->data_size = total_sz;
	iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
	return cur_page;
}
Exemple #2
0
static int
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
	     struct iser_mem_reg *reg)
{
	struct scatterlist *sg = mem->sg;

	reg->sge.lkey = device->pd->local_dma_lkey;
	/*
	 * FIXME: rework the registration code path to differentiate
	 * rkey/lkey use cases
	 */

	if (device->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
		reg->rkey = device->pd->unsafe_global_rkey;
	else
		reg->rkey = 0;
	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);

	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
		 reg->sge.addr, reg->sge.length);

	return 0;
}
Exemple #3
0
static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
		struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
		enum dma_data_direction dir)
{
	struct ib_device *dev = qp->pd->device;
	struct ib_rdma_wr *rdma_wr = &ctx->single.wr;

	ctx->nr_ops = 1;

	ctx->single.sge.lkey = qp->pd->local_dma_lkey;
	ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
	ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;

	memset(rdma_wr, 0, sizeof(*rdma_wr));
	if (dir == DMA_TO_DEVICE)
		rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
	else
		rdma_wr->wr.opcode = IB_WR_RDMA_READ;
	rdma_wr->wr.sg_list = &ctx->single.sge;
	rdma_wr->wr.num_sge = 1;
	rdma_wr->remote_addr = remote_addr;
	rdma_wr->rkey = rkey;

	ctx->type = RDMA_RW_SINGLE_WR;
	return 1;
}
Exemple #4
0
static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
		struct scatterlist *sg, u32 sg_cnt, u32 offset,
		u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
	struct ib_device *dev = qp->pd->device;
	u32 max_sge = rdma_rw_max_sge(dev, dir);
	struct ib_sge *sge;
	u32 total_len = 0, i, j;

	ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);

	ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
	if (!ctx->map.sges)
		goto out;

	ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
	if (!ctx->map.wrs)
		goto out_free_sges;

	for (i = 0; i < ctx->nr_ops; i++) {
		struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
		u32 nr_sge = min(sg_cnt, max_sge);

		if (dir == DMA_TO_DEVICE)
			rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
		else
			rdma_wr->wr.opcode = IB_WR_RDMA_READ;
		rdma_wr->remote_addr = remote_addr + total_len;
		rdma_wr->rkey = rkey;
		rdma_wr->wr.sg_list = sge;

		for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
			rdma_wr->wr.num_sge++;

			sge->addr = ib_sg_dma_address(dev, sg) + offset;
			sge->length = ib_sg_dma_len(dev, sg) - offset;
			sge->lkey = qp->pd->local_dma_lkey;

			total_len += sge->length;
			sge++;
			sg_cnt--;
			offset = 0;
		}

		if (i + 1 < ctx->nr_ops)
			rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr;
	}

	ctx->type = RDMA_RW_MULTI_WR;
	return ctx->nr_ops;

out_free_sges:
	kfree(ctx->map.sges);
out:
	return -ENOMEM;
}
Exemple #5
0
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
{
	struct scatterlist *sg;
	int i;

	for_each_sg(data->sg, sg, data->dma_nents, i)
		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
			 sg_page(sg), sg->offset,
			 sg->length, ib_sg_dma_len(ibdev, sg));
}
Exemple #6
0
static void iser_data_buf_dump(struct iser_data_buf *data,
			       struct ib_device *ibdev)
{
	struct scatterlist *sg = (struct scatterlist *)data->buf;
	int i;

	for (i = 0; i < data->dma_nents; i++)
		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
			 "off:0x%x sz:0x%x dma_len:0x%x\n",
			 i, (unsigned long)ib_sg_dma_address(ibdev, &sg[i]),
			 sg[i].page, sg[i].offset,
			 sg[i].length, ib_sg_dma_len(ibdev, &sg[i]));
}
Exemple #7
0
/**
 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
 * the number of entries which are aligned correctly. Supports the case where
 * consecutive SG elements are actually fragments of the same physcial page.
 */
static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
					      struct ib_device *ibdev)
{
	struct scatterlist *sg;
	u64 end_addr, next_addr;
	int i, cnt;
	unsigned int ret_len = 0;

	sg = (struct scatterlist *)data->buf;

	for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) {
		/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
		   "offset: %ld sz: %ld\n", i,
		   (unsigned long)page_to_phys(sg[i].page),
		   (unsigned long)sg[i].offset,
		   (unsigned long)sg[i].length); */
		end_addr = ib_sg_dma_address(ibdev, &sg[i]) +
			   ib_sg_dma_len(ibdev, &sg[i]);
		/* iser_dbg("Checking sg iobuf end address "
		       "0x%08lX\n", end_addr); */
		if (i + 1 < data->dma_nents) {
			next_addr = ib_sg_dma_address(ibdev, &sg[i+1]);
			/* are i, i+1 fragments of the same page? */
			if (end_addr == next_addr)
				continue;
			else if (!IS_4K_ALIGNED(end_addr)) {
				ret_len = cnt + 1;
				break;
			}
		}
	}
	if (i == data->dma_nents)
		ret_len = cnt;	/* loop ended */
	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
		 ret_len, data->dma_nents, data);
	return ret_len;
}
Exemple #8
0
int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
		   struct scatterlist *sg, unsigned int nents)
{
	struct ib_device *dev = rds_ibdev->dev;
	struct rds_ib_fmr *fmr = &ibmr->u.fmr;
	struct scatterlist *scat = sg;
	u64 io_addr = 0;
	u64 *dma_pages;
	u32 len;
	int page_cnt, sg_dma_len;
	int i, j;
	int ret;

	sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
	if (unlikely(!sg_dma_len)) {
		pr_warn("RDS/IB: %s failed!\n", __func__);
		return -EBUSY;
	}

	len = 0;
	page_cnt = 0;

	for (i = 0; i < sg_dma_len; ++i) {
		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);

		if (dma_addr & ~PAGE_MASK) {
			if (i > 0)
				return -EINVAL;
			else
				++page_cnt;
		}
		if ((dma_addr + dma_len) & ~PAGE_MASK) {
			if (i < sg_dma_len - 1)
				return -EINVAL;
			else
				++page_cnt;
		}

		len += dma_len;
	}

	page_cnt += len >> PAGE_SHIFT;
	if (page_cnt > ibmr->pool->fmr_attr.max_pages)
		return -EINVAL;

	dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
				 rdsibdev_to_node(rds_ibdev));
	if (!dma_pages)
		return -ENOMEM;

	page_cnt = 0;
	for (i = 0; i < sg_dma_len; ++i) {
		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);

		for (j = 0; j < dma_len; j += PAGE_SIZE)
			dma_pages[page_cnt++] =
				(dma_addr & PAGE_MASK) + j;
	}

	ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
	if (ret)
		goto out;

	/* Success - we successfully remapped the MR, so we can
	 * safely tear down the old mapping.
	 */
	rds_ib_teardown_mr(ibmr);

	ibmr->sg = scat;
	ibmr->sg_len = nents;
	ibmr->sg_dma_len = sg_dma_len;
	ibmr->remap_count++;

	if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
		rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
	else
		rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
	ret = 0;

out:
	kfree(dma_pages);

	return ret;
}
Exemple #9
0
static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
			   struct rds_ib_mr_pool *pool,
			   struct rds_ib_mr *ibmr,
			   struct scatterlist *sg, unsigned int sg_len)
{
	struct ib_device *dev = rds_ibdev->dev;
	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
	int i;
	u32 len;
	int ret = 0;

	/* We want to teardown old ibmr values here and fill it up with
	 * new sg values
	 */
	rds_ib_teardown_mr(ibmr);

	ibmr->sg = sg;
	ibmr->sg_len = sg_len;
	ibmr->sg_dma_len = 0;
	frmr->sg_byte_len = 0;
	WARN_ON(ibmr->sg_dma_len);
	ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
					 DMA_BIDIRECTIONAL);
	if (unlikely(!ibmr->sg_dma_len)) {
		pr_warn("RDS/IB: %s failed!\n", __func__);
		return -EBUSY;
	}

	frmr->sg_byte_len = 0;
	frmr->dma_npages = 0;
	len = 0;

	ret = -EINVAL;
	for (i = 0; i < ibmr->sg_dma_len; ++i) {
		unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
		u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);

		frmr->sg_byte_len += dma_len;
		if (dma_addr & ~PAGE_MASK) {
			if (i > 0)
				goto out_unmap;
			else
				++frmr->dma_npages;
		}

		if ((dma_addr + dma_len) & ~PAGE_MASK) {
			if (i < ibmr->sg_dma_len - 1)
				goto out_unmap;
			else
				++frmr->dma_npages;
		}

		len += dma_len;
	}
	frmr->dma_npages += len >> PAGE_SHIFT;

	if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
		ret = -EMSGSIZE;
		goto out_unmap;
	}

	ret = rds_ib_post_reg_frmr(ibmr);
	if (ret)
		goto out_unmap;

	if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
		rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
	else
		rds_ib_stats_inc(s_ib_rdma_mr_1m_used);

	return ret;

out_unmap:
	ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
			DMA_BIDIRECTIONAL);
	ibmr->sg_dma_len = 0;
	return ret;
}
Exemple #10
0
/**
 * iser_reg_rdma_mem - Registers memory intended for RDMA,
 * obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
		      enum   iser_data_dir        cmd_dir)
{
	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
	struct ib_device     *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
	struct iser_regd_buf *regd_buf;
	int aligned_len;
	int err;
	int i;
	struct scatterlist *sg;

	regd_buf = &iser_ctask->rdma_regd[cmd_dir];

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
		iser_err("rdma alignment violation %d/%d aligned\n",
			 aligned_len, mem->size);
		iser_data_buf_dump(mem, ibdev);

		/* unmap the command data before accessing it */
		iser_dma_unmap_task_data(iser_ctask);

		/* allocate copy buf, if we are writing, copy the */
		/* unaligned scatterlist, dma map the copy        */
		if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
				return -ENOMEM;
		mem = &iser_ctask->data_copy[cmd_dir];
	}

	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
		sg = (struct scatterlist *)mem->buf;

		regd_buf->reg.lkey = device->mr->lkey;
		regd_buf->reg.rkey = device->mr->rkey;
		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
		regd_buf->reg.is_fmr = 0;

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
			 (unsigned int)regd_buf->reg.lkey,
			 (unsigned int)regd_buf->reg.rkey,
			 (unsigned long)regd_buf->reg.va,
			 (unsigned long)regd_buf->reg.len);
	} else { /* use FMR for multiple dma entries */
		iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
		err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
		if (err) {
			iser_data_buf_dump(mem, ibdev);
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
				 ntoh24(iser_ctask->desc.iscsi_header.dlength));
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
				 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
				 ib_conn->page_vec->offset);
			for (i=0 ; i<ib_conn->page_vec->length ; i++)
				iser_err("page_vec[%d] = 0x%llx\n", i,
					 (unsigned long long) ib_conn->page_vec->pages[i]);
			return err;
		}
	}

	/* take a reference on this regd buf such that it will not be released *
	 * (eg in send dto completion) before we get the scsi response         */
	atomic_inc(&regd_buf->ref_count);
	return 0;
}