Beispiel #1
0
/**
 * iser_send_command - send command PDU
 */
int iser_send_command(struct iscsi_conn *conn,
		      struct iscsi_task *task)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
	struct iscsi_iser_task *iser_task = task->dd_data;
	struct iser_dto *send_dto = NULL;
	unsigned long edtl;
	int err = 0;
	struct iser_data_buf *data_buf;
	struct iscsi_cmd *hdr =  (struct iscsi_cmd *)task->hdr;
	struct scsi_cmnd *sc  =  task->sc;

	if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
		iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
		return -EPERM;
	}
	if (iser_check_xmit(conn, task))
		return -ENOBUFS;

	edtl = ntohl(hdr->data_length);

	/* build the tx desc regd header and add it to the tx desc dto */
	iser_task->desc.type = ISCSI_TX_SCSI_COMMAND;
	send_dto = &iser_task->desc.dto;
	send_dto->task = iser_task;
	iser_create_send_desc(iser_conn, &iser_task->desc);

	if (hdr->flags & ISCSI_FLAG_CMD_READ)
		data_buf = &iser_task->data[ISER_DIR_IN];
	else
		data_buf = &iser_task->data[ISER_DIR_OUT];

	if (scsi_sg_count(sc)) { /* using a scatter list */
		data_buf->buf  = scsi_sglist(sc);
		data_buf->size = scsi_sg_count(sc);
	}

	data_buf->data_len = scsi_bufflen(sc);

	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
		err = iser_prepare_read_cmd(task, edtl);
		if (err)
			goto send_command_error;
	}
	if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
		err = iser_prepare_write_cmd(task,
					     task->imm_count,
				             task->imm_count +
					     task->unsol_r2t.data_length,
					     edtl);
		if (err)
			goto send_command_error;
	}

	iser_reg_single(iser_conn->ib_conn->device,
			send_dto->regd[0], DMA_TO_DEVICE);

	if (iser_post_receive_control(conn) != 0) {
		iser_err("post_recv failed!\n");
		err = -ENOMEM;
		goto send_command_error;
	}

	iser_task->status = ISER_TASK_STATUS_STARTED;

	err = iser_post_send(&iser_task->desc);
	if (!err)
		return 0;

send_command_error:
	iser_dto_buffs_release(send_dto);
	iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
	return err;
}
Beispiel #2
0
/**
 * iser_send_data_out - send data out PDU
 */
int iser_send_data_out(struct iscsi_conn *conn,
		       struct iscsi_task *task,
		       struct iscsi_data *hdr)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
	struct iscsi_iser_task *iser_task = task->dd_data;
	struct iser_desc *tx_desc = NULL;
	struct iser_dto *send_dto = NULL;
	unsigned long buf_offset;
	unsigned long data_seg_len;
	uint32_t itt;
	int err = 0;

	if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
		iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
		return -EPERM;
	}

	if (iser_check_xmit(conn, task))
		return -ENOBUFS;

	itt = (__force uint32_t)hdr->itt;
	data_seg_len = ntoh24(hdr->dlength);
	buf_offset   = ntohl(hdr->offset);

	iser_dbg("%s itt %d dseg_len %d offset %d\n",
		 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);

	tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
	if (tx_desc == NULL) {
		iser_err("Failed to alloc desc for post dataout\n");
		return -ENOMEM;
	}

	tx_desc->type = ISCSI_TX_DATAOUT;
	memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));

	/* build the tx desc regd header and add it to the tx desc dto */
	send_dto = &tx_desc->dto;
	send_dto->task = iser_task;
	iser_create_send_desc(iser_conn, tx_desc);

	iser_reg_single(iser_conn->ib_conn->device,
			send_dto->regd[0], DMA_TO_DEVICE);

	/* all data was registered for RDMA, we can use the lkey */
	iser_dto_add_regd_buff(send_dto,
			       &iser_task->rdma_regd[ISER_DIR_OUT],
			       buf_offset,
			       data_seg_len);

	if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
		iser_err("Offset:%ld & DSL:%ld in Data-Out "
			 "inconsistent with total len:%ld, itt:%d\n",
			 buf_offset, data_seg_len,
			 iser_task->data[ISER_DIR_OUT].data_len, itt);
		err = -EINVAL;
		goto send_data_out_error;
	}
	iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
		 itt, buf_offset, data_seg_len);


	err = iser_post_send(tx_desc);
	if (!err)
		return 0;

send_data_out_error:
	iser_dto_buffs_release(send_dto);
	kmem_cache_free(ig.desc_cache, tx_desc);
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}
Beispiel #3
0
static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
    iser_err("got qp event %d\n",cause->event);
}
Beispiel #4
0
/**
 * iser_post_receive_control - allocates, initializes and posts receive DTO.
 */
static int iser_post_receive_control(struct iscsi_conn *conn)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
	struct iser_desc     *rx_desc;
	struct iser_regd_buf *regd_hdr;
	struct iser_regd_buf *regd_data;
	struct iser_dto      *recv_dto = NULL;
	struct iser_device  *device = iser_conn->ib_conn->device;
	int rx_data_size, err;
	int posts, outstanding_unexp_pdus;

	/* for the login sequence we must support rx of upto 8K; login is done
	 * after conn create/bind (connect) and conn stop/bind (reconnect),
	 * what's common for both schemes is that the connection is not started
	 */
	if (conn->c_stage != ISCSI_CONN_STARTED)
		rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
	else /* FIXME till user space sets conn->max_recv_dlength correctly */
		rx_data_size = 128;

	outstanding_unexp_pdus =
		atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);

	/*
	 * in addition to the response buffer, replace those consumed by
	 * unexpected pdus.
	 */
	for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
		rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
		if (rx_desc == NULL) {
			iser_err("Failed to alloc desc for post recv %d\n",
				 posts);
			err = -ENOMEM;
			goto post_rx_cache_alloc_failure;
		}
		rx_desc->type = ISCSI_RX;
		rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
		if (rx_desc->data == NULL) {
			iser_err("Failed to alloc data buf for post recv %d\n",
				 posts);
			err = -ENOMEM;
			goto post_rx_kmalloc_failure;
		}

		recv_dto = &rx_desc->dto;
		recv_dto->ib_conn = iser_conn->ib_conn;
		recv_dto->regd_vector_len = 0;

		regd_hdr = &rx_desc->hdr_regd_buf;
		memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
		regd_hdr->device  = device;
		regd_hdr->virt_addr  = rx_desc; /* == &rx_desc->iser_header */
		regd_hdr->data_size  = ISER_TOTAL_HEADERS_LEN;

		iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);

		iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);

		regd_data = &rx_desc->data_regd_buf;
		memset(regd_data, 0, sizeof(struct iser_regd_buf));
		regd_data->device  = device;
		regd_data->virt_addr  = rx_desc->data;
		regd_data->data_size  = rx_data_size;

		iser_reg_single(device, regd_data, DMA_FROM_DEVICE);

		iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);

		err = iser_post_recv(rx_desc);
		if (err) {
			iser_err("Failed iser_post_recv for post %d\n", posts);
			goto post_rx_post_recv_failure;
		}
	}
	/* all posts successful */
	return 0;

post_rx_post_recv_failure:
	iser_dto_buffs_release(recv_dto);
	kfree(rx_desc->data);
post_rx_kmalloc_failure:
	kmem_cache_free(ig.desc_cache, rx_desc);
post_rx_cache_alloc_failure:
	if (posts > 0) {
		/*
		 * response buffer posted, but did not replace all unexpected
		 * pdu recv bufs. Ignore error, retry occurs next send
		 */
		outstanding_unexp_pdus -= (posts - 1);
		err = 0;
	}
	atomic_add(outstanding_unexp_pdus,
		   &iser_conn->ib_conn->unexpected_pdu_count);

	return err;
}
Beispiel #5
0
/**
 * iscsi_iser_session_create() - create an iscsi-iser session
 * @ep:             iscsi end-point handle
 * @cmds_max:       maximum commands in this session
 * @qdepth:         session command queue depth
 * @initial_cmdsn:  initiator command sequnce number
 *
 * Allocates and adds a scsi host, expose DIF supprot if
 * exists, and sets up an iscsi session.
 */
static struct iscsi_cls_session *
iscsi_iser_session_create(struct iscsi_endpoint *ep,
			  uint16_t cmds_max, uint16_t qdepth,
			  uint32_t initial_cmdsn)
{
	struct iscsi_cls_session *cls_session;
	struct iscsi_session *session;
	struct Scsi_Host *shost;
	struct iser_conn *iser_conn = NULL;
	struct ib_conn *ib_conn;
	u16 max_cmds;

	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
	if (!shost)
		return NULL;
	shost->transportt = iscsi_iser_scsi_transport;
	shost->cmd_per_lun = qdepth;
	shost->max_lun = iscsi_max_lun;
	shost->max_id = 0;
	shost->max_channel = 0;
	shost->max_cmd_len = 16;

	/*
	 * older userspace tools (before 2.0-870) did not pass us
	 * the leading conn's ep so this will be NULL;
	 */
	if (ep) {
		iser_conn = ep->dd_data;
		max_cmds = iser_conn->max_cmds;
		shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
		shost->max_sectors = iser_conn->scsi_max_sectors;

		mutex_lock(&iser_conn->state_mutex);
		if (iser_conn->state != ISER_CONN_UP) {
			iser_err("iser conn %p already started teardown\n",
				 iser_conn);
			mutex_unlock(&iser_conn->state_mutex);
			goto free_host;
		}

		ib_conn = &iser_conn->ib_conn;
		if (ib_conn->pi_support) {
			u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;

			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
						   SHOST_DIX_GUARD_CRC);
		}

		/*
		 * Limit the sg_tablesize and max_sectors based on the device
		 * max fastreg page list length.
		 */
		shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
			ib_conn->device->dev_attr.max_fast_reg_page_list_len);
		shost->max_sectors = min_t(unsigned int,
			1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);

		if (iscsi_host_add(shost,
				   ib_conn->device->ib_device->dma_device)) {
			mutex_unlock(&iser_conn->state_mutex);
			goto free_host;
		}
		mutex_unlock(&iser_conn->state_mutex);
	} else {
Beispiel #6
0
/**
 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
    struct iser_device    *device;
    struct ib_qp_init_attr    init_attr;
    int            ret;
    struct ib_fmr_pool_param params;

    BUG_ON(ib_conn->device == NULL);

    device = ib_conn->device;

    ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
                    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
                    GFP_KERNEL);
    if (!ib_conn->page_vec) {
        ret = -ENOMEM;
        goto alloc_err;
    }
    ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

    params.page_shift        = SHIFT_4K;
    /* when the first/last SG element are not start/end *
     * page aligned, the map whould be of N+1 pages     */
    params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
    /* make the pool size twice the max number of SCSI commands *
     * the ML is expected to queue, watermark for unmap at 50%  */
    params.pool_size     = ISCSI_DEF_XMIT_CMDS_MAX * 2;
    params.dirty_watermark     = ISCSI_DEF_XMIT_CMDS_MAX;
    params.cache         = 0;
    params.flush_function     = NULL;
    params.access         = (IB_ACCESS_LOCAL_WRITE  |
                    IB_ACCESS_REMOTE_WRITE |
                    IB_ACCESS_REMOTE_READ);

    ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
    if (IS_ERR(ib_conn->fmr_pool)) {
        ret = PTR_ERR(ib_conn->fmr_pool);
        goto fmr_pool_err;
    }

    memset(&init_attr, 0, sizeof init_attr);

    init_attr.event_handler = iser_qp_event_callback;
    init_attr.qp_context    = (void *)ib_conn;
    init_attr.send_cq    = device->cq;
    init_attr.recv_cq    = device->cq;
    init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
    init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
    init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
    init_attr.cap.max_recv_sge = 2;
    init_attr.sq_sig_type    = IB_SIGNAL_REQ_WR;
    init_attr.qp_type    = IB_QPT_RC;

    ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
    if (ret)
        goto qp_err;

    ib_conn->qp = ib_conn->cma_id->qp;
    iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
         ib_conn, ib_conn->cma_id,
         ib_conn->fmr_pool, ib_conn->cma_id->qp);
    return ret;

qp_err:
    (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
fmr_pool_err:
    kfree(ib_conn->page_vec);
alloc_err:
    iser_err("unable to alloc mem or create resource, err %d\n", ret);
    return ret;
}
Beispiel #7
0
/**
 * iser_reg_rdma_mem - Registers memory intended for RDMA,
 * obtaining rkey and va
 *
 * returns 0 on success, errno code on failure
 */
int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
		      enum   iser_data_dir        cmd_dir)
{
	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
	struct iser_device   *device = ib_conn->device;
	struct ib_device     *ibdev = device->ib_device;
	struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
	struct iser_regd_buf *regd_buf;
	int aligned_len;
	int err;
	int i;
	struct scatterlist *sg;

	regd_buf = &iser_ctask->rdma_regd[cmd_dir];

	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
	if (aligned_len != mem->dma_nents) {
		iser_err("rdma alignment violation %d/%d aligned\n",
			 aligned_len, mem->size);
		iser_data_buf_dump(mem, ibdev);

		/* unmap the command data before accessing it */
		iser_dma_unmap_task_data(iser_ctask);

		/* allocate copy buf, if we are writing, copy the */
		/* unaligned scatterlist, dma map the copy        */
		if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
				return -ENOMEM;
		mem = &iser_ctask->data_copy[cmd_dir];
	}

	/* if there a single dma entry, FMR is not needed */
	if (mem->dma_nents == 1) {
		sg = (struct scatterlist *)mem->buf;

		regd_buf->reg.lkey = device->mr->lkey;
		regd_buf->reg.rkey = device->mr->rkey;
		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
		regd_buf->reg.is_fmr = 0;

		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
			 "va: 0x%08lX sz: %ld]\n",
			 (unsigned int)regd_buf->reg.lkey,
			 (unsigned int)regd_buf->reg.rkey,
			 (unsigned long)regd_buf->reg.va,
			 (unsigned long)regd_buf->reg.len);
	} else { /* use FMR for multiple dma entries */
		iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
		err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
		if (err) {
			iser_data_buf_dump(mem, ibdev);
			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
				 ntoh24(iser_ctask->desc.iscsi_header.dlength));
			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
				 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
				 ib_conn->page_vec->offset);
			for (i=0 ; i<ib_conn->page_vec->length ; i++)
				iser_err("page_vec[%d] = 0x%llx\n", i,
					 (unsigned long long) ib_conn->page_vec->pages[i]);
			return err;
		}
	}

	/* take a reference on this regd buf such that it will not be released *
	 * (eg in send dto completion) before we get the scsi response         */
	atomic_inc(&regd_buf->ref_count);
	return 0;
}
Beispiel #8
0
/**
 * iser_post_receive_control - allocates, initializes and posts receive DTO.
 */
static int iser_post_receive_control(struct iscsi_conn *conn)
{
	struct iscsi_iser_conn *iser_conn = conn->dd_data;
	struct iser_desc     *rx_desc;
	struct iser_regd_buf *regd_hdr;
	struct iser_regd_buf *regd_data;
	struct iser_dto      *recv_dto = NULL;
	struct iser_device  *device = iser_conn->ib_conn->device;
	int rx_data_size, err = 0;

	rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
	if (rx_desc == NULL) {
		iser_err("Failed to alloc desc for post recv\n");
		return -ENOMEM;
	}
	rx_desc->type = ISCSI_RX;

	/* for the login sequence we must support rx of upto 8K; login is done
	 * after conn create/bind (connect) and conn stop/bind (reconnect),
	 * what's common for both schemes is that the connection is not started
	 */
	if (conn->c_stage != ISCSI_CONN_STARTED)
		rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
	else /* FIXME till user space sets conn->max_recv_dlength correctly */
		rx_data_size = 128;

	rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
	if (rx_desc->data == NULL) {
		iser_err("Failed to alloc data buf for post recv\n");
		err = -ENOMEM;
		goto post_rx_kmalloc_failure;
	}

	recv_dto = &rx_desc->dto;
	recv_dto->ib_conn = iser_conn->ib_conn;
	recv_dto->regd_vector_len = 0;

	regd_hdr = &rx_desc->hdr_regd_buf;
	memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
	regd_hdr->device  = device;
	regd_hdr->virt_addr  = rx_desc; /* == &rx_desc->iser_header */
	regd_hdr->data_size  = ISER_TOTAL_HEADERS_LEN;

	iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);

	iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);

	regd_data = &rx_desc->data_regd_buf;
	memset(regd_data, 0, sizeof(struct iser_regd_buf));
	regd_data->device  = device;
	regd_data->virt_addr  = rx_desc->data;
	regd_data->data_size  = rx_data_size;

	iser_reg_single(device, regd_data, DMA_FROM_DEVICE);

	iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);

	err = iser_post_recv(rx_desc);
	if (!err)
		return 0;

	/* iser_post_recv failed */
	iser_dto_buffs_release(recv_dto);
	kfree(rx_desc->data);
post_rx_kmalloc_failure:
	kmem_cache_free(ig.desc_cache, rx_desc);
	return err;
}
Beispiel #9
0
static void iser_event_handler(struct ib_event_handler *handler,
				struct ib_event *event)
{
	iser_err("async event %d on device %s port %d\n", event->event,
		event->device->name, event->element.port_num);
}
/* Register user buffer memory and initialize passive rdma
 *  dto descriptor. Data size is stored in
 *  task->data[ISER_DIR_OUT].data_len, Protection size
 *  is stored at task->prot[ISER_DIR_OUT].data_len
 */
static int
iser_prepare_write_cmd(struct iscsi_task *task,
		       unsigned int imm_sz,
		       unsigned int unsol_sz,
		       unsigned int edtl)
{
	struct iscsi_iser_task *iser_task = task->dd_data;
	struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
	struct iser_regd_buf *regd_buf;
	int err;
	struct iser_hdr *hdr = &iser_task->desc.iser_header;
	struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
	struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];

	err = iser_dma_map_task_data(iser_task,
				     buf_out,
				     ISER_DIR_OUT,
				     DMA_TO_DEVICE);
	if (err)
		return err;

	if (scsi_prot_sg_count(iser_task->sc)) {
		struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];

		err = iser_dma_map_task_data(iser_task,
					     pbuf_out,
					     ISER_DIR_OUT,
					     DMA_TO_DEVICE);
		if (err)
			return err;
	}

	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
	if (err != 0) {
		iser_err("Failed to register write cmd RDMA mem\n");
		return err;
	}

	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];

	if (unsol_sz < edtl) {
		hdr->flags     |= ISER_WSV;
		hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
		hdr->write_va   = cpu_to_be64(regd_buf->reg.va + unsol_sz);

		iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
			 "VA:%#llX + unsol:%d\n",
			 task->itt, regd_buf->reg.rkey,
			 (unsigned long long)regd_buf->reg.va, unsol_sz);
	}

	if (imm_sz > 0) {
		iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
			 task->itt, imm_sz);
		tx_dsg->addr   = regd_buf->reg.va;
		tx_dsg->length = imm_sz;
		tx_dsg->lkey   = regd_buf->reg.lkey;
		iser_task->desc.num_sge = 2;
	}

	return 0;
}
/**
 * iser_send_data_out - send data out PDU
 */
int iser_send_data_out(struct iscsi_conn *conn,
		       struct iscsi_task *task,
		       struct iscsi_data *hdr)
{
	struct iser_conn *iser_conn = conn->dd_data;
	struct iscsi_iser_task *iser_task = task->dd_data;
	struct iser_tx_desc *tx_desc = NULL;
	struct iser_regd_buf *regd_buf;
	unsigned long buf_offset;
	unsigned long data_seg_len;
	uint32_t itt;
	int err = 0;
	struct ib_sge *tx_dsg;

	itt = (__force uint32_t)hdr->itt;
	data_seg_len = ntoh24(hdr->dlength);
	buf_offset   = ntohl(hdr->offset);

	iser_dbg("%s itt %d dseg_len %d offset %d\n",
		 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);

	tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
	if (tx_desc == NULL) {
		iser_err("Failed to alloc desc for post dataout\n");
		return -ENOMEM;
	}

	tx_desc->type = ISCSI_TX_DATAOUT;
	tx_desc->iser_header.flags = ISER_VER;
	memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));

	/* build the tx desc */
	iser_initialize_task_headers(task, tx_desc);

	regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
	tx_dsg = &tx_desc->tx_sg[1];
	tx_dsg->addr    = regd_buf->reg.va + buf_offset;
	tx_dsg->length  = data_seg_len;
	tx_dsg->lkey    = regd_buf->reg.lkey;
	tx_desc->num_sge = 2;

	if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
		iser_err("Offset:%ld & DSL:%ld in Data-Out "
			 "inconsistent with total len:%ld, itt:%d\n",
			 buf_offset, data_seg_len,
			 iser_task->data[ISER_DIR_OUT].data_len, itt);
		err = -EINVAL;
		goto send_data_out_error;
	}
	iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
		 itt, buf_offset, data_seg_len);


	err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
	if (!err)
		return 0;

send_data_out_error:
	kmem_cache_free(ig.desc_cache, tx_desc);
	iser_err("conn %p failed err %d\n",conn, err);
	return err;
}
/**
 * iser_send_command - send command PDU
 */
int iser_send_command(struct iscsi_conn *conn,
		      struct iscsi_task *task)
{
	struct iser_conn *iser_conn = conn->dd_data;
	struct iscsi_iser_task *iser_task = task->dd_data;
	unsigned long edtl;
	int err;
	struct iser_data_buf *data_buf, *prot_buf;
	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
	struct scsi_cmnd *sc  =  task->sc;
	struct iser_tx_desc *tx_desc = &iser_task->desc;
	u8 sig_count = ++iser_conn->ib_conn.sig_count;

	edtl = ntohl(hdr->data_length);

	/* build the tx desc regd header and add it to the tx desc dto */
	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
	iser_create_send_desc(iser_conn, tx_desc);

	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
		data_buf = &iser_task->data[ISER_DIR_IN];
		prot_buf = &iser_task->prot[ISER_DIR_IN];
	} else {
		data_buf = &iser_task->data[ISER_DIR_OUT];
		prot_buf = &iser_task->prot[ISER_DIR_OUT];
	}

	if (scsi_sg_count(sc)) { /* using a scatter list */
		data_buf->buf  = scsi_sglist(sc);
		data_buf->size = scsi_sg_count(sc);
	}
	data_buf->data_len = scsi_bufflen(sc);

	if (scsi_prot_sg_count(sc)) {
		prot_buf->buf  = scsi_prot_sglist(sc);
		prot_buf->size = scsi_prot_sg_count(sc);
		prot_buf->data_len = (data_buf->data_len >>
				     ilog2(sc->device->sector_size)) * 8;
	}

	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
		err = iser_prepare_read_cmd(task);
		if (err)
			goto send_command_error;
	}
	if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
		err = iser_prepare_write_cmd(task,
					     task->imm_count,
				             task->imm_count +
					     task->unsol_r2t.data_length,
					     edtl);
		if (err)
			goto send_command_error;
	}

	iser_task->status = ISER_TASK_STATUS_STARTED;

	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
			     iser_signal_comp(sig_count));
	if (!err)
		return 0;

send_command_error:
	iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
	return err;
}
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			req_err, resp_err, ret = -ENOMEM;
	struct ib_fmr_pool_param params;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;

	ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!ib_conn->login_buf)
		goto out_err;

	ib_conn->login_req_buf  = ib_conn->login_buf;
	ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;

	ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_req_buf,
				ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);

	ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_resp_buf,
				ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);

	req_err  = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
	resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);

	if (req_err || resp_err) {
		if (req_err)
			ib_conn->login_req_dma = 0;
		if (resp_err)
			ib_conn->login_resp_dma = 0;
		goto out_err;
	}

	ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
				    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
				    GFP_KERNEL);
	if (!ib_conn->page_vec)
		goto out_err;

	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

	params.page_shift        = SHIFT_4K;
	/*                                                   
                                                     */
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	/*                                                           
                                                             */
	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

	ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
	if (IS_ERR(ib_conn->fmr_pool)) {
		ret = PTR_ERR(ib_conn->fmr_pool);
		ib_conn->fmr_pool = NULL;
		goto out_err;
	}

	memset(&init_attr, 0, sizeof init_attr);

	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
	init_attr.send_cq	= device->tx_cq;
	init_attr.recv_cq	= device->rx_cq;
	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
	init_attr.cap.max_send_sge = 2;
	init_attr.cap.max_recv_sge = 1;
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
		goto out_err;

	ib_conn->qp = ib_conn->cma_id->qp;
	iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
		 ib_conn, ib_conn->cma_id,
		 ib_conn->fmr_pool, ib_conn->cma_id->qp);
	return ret;

out_err:
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}