int spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req, void *in_cap_data, uint32_t in_cap_len, void *bb, uint32_t bb_len) { struct spdk_nvmf_conn *conn = req->conn; struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd; struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; enum spdk_nvme_data_transfer xfer; int ret; nvmf_trace_command(req->cmd, conn->type); req->length = 0; req->xfer = SPDK_NVME_DATA_NONE; req->data = NULL; if (cmd->opc == SPDK_NVME_OPC_FABRIC) { xfer = spdk_nvme_opc_get_data_transfer(req->cmd->nvmf_cmd.fctype); } else { xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); } if (xfer != SPDK_NVME_DATA_NONE) { struct spdk_nvme_sgl_descriptor *sgl = (struct spdk_nvme_sgl_descriptor *)&cmd->dptr.sgl1; if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS || sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { if (sgl->keyed.length > bb_len) { SPDK_ERRLOG("SGL length 0x%x exceeds BB length 0x%x\n", sgl->keyed.length, bb_len); rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; return -1; } req->data = bb; req->length = sgl->keyed.length; } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { uint64_t offset = sgl->address; uint32_t max_len = in_cap_len; SPDK_TRACELOG(SPDK_TRACE_NVMF, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", offset, sgl->unkeyed.length); if (conn->type == CONN_TYPE_AQ) { SPDK_ERRLOG("In-capsule data not allowed for admin queue\n"); return -1; } if (offset > max_len) { SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", offset, max_len); rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET; return -1; } max_len -= (uint32_t)offset; if (sgl->unkeyed.length > max_len) { SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", sgl->unkeyed.length, max_len); rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; return -1; } req->data = in_cap_data + offset; req->length = sgl->unkeyed.length; } else { SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n", sgl->generic.type, sgl->generic.subtype); rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID; return -1; } if (req->length == 0) { xfer = SPDK_NVME_DATA_NONE; req->data = NULL; } req->xfer = xfer; /* * For any I/O that requires data to be * pulled into target BB before processing by * the backend NVMe device */ if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) { SPDK_TRACELOG(SPDK_TRACE_NVMF, "Initiating Host to Controller data transfer\n"); ret = nvmf_post_rdma_read(conn, req); if (ret) { SPDK_ERRLOG("Unable to post rdma read tx descriptor\n"); rsp->status.sc = SPDK_NVME_SC_DATA_TRANSFER_ERROR; return -1; } /* Wait for transfer to complete before executing command. */ return 1; } } } if (xfer == SPDK_NVME_DATA_NONE) { SPDK_TRACELOG(SPDK_TRACE_NVMF, "No data to transfer\n"); RTE_VERIFY(req->data == NULL); RTE_VERIFY(req->length == 0); } else { RTE_VERIFY(req->data != NULL); RTE_VERIFY(req->length != 0); SPDK_TRACELOG(SPDK_TRACE_NVMF, "%s data ready\n", xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER ? "Host to Controller" : "Controller to Host"); } return 0; }
static int nvmf_process_io_command(struct spdk_nvmf_conn *conn, struct nvme_qp_tx_desc *tx_desc) { struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; struct nvmf_request *req; struct spdk_nvme_sgl_descriptor *sgl; struct spdk_nvmf_keyed_sgl_descriptor *keyed_sgl; struct spdk_nvme_cmd *cmd; enum spdk_nvme_data_transfer xfer; void *buf = NULL; uint32_t len = 0; int ret; req = &tx_desc->req_state; cmd = &req->cmd->nvme_cmd; sgl = (struct spdk_nvme_sgl_descriptor *)&cmd->dptr.sgl1; keyed_sgl = (struct spdk_nvmf_keyed_sgl_descriptor *)sgl; xfer = spdk_nvme_opc_get_data_transfer(cmd->opc); if (xfer != SPDK_NVME_DATA_NONE) { /* NVMf does support in-capsule data for write comamnds. If caller indicates SGL, verify the SGL for in-capsule or RDMA read/write use and prepare data buffer reference and length for the NVMf library. */ /* TBD: add code to handle I/O larger than default bb size */ if (sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && (sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_ADDRESS || sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { if (keyed_sgl->key == 0) { SPDK_ERRLOG("Host did not specify SGL key!\n"); goto command_fail; } if (keyed_sgl->length > rx_desc->bb_sgl.length) { SPDK_ERRLOG("SGL length 0x%x exceeds BB length 0x%x\n", (uint32_t)keyed_sgl->length, rx_desc->bb_sgl.length); goto command_fail; } buf = (void *)rx_desc->bb; len = rx_desc->bb_sgl.length; req->remote_addr = keyed_sgl->address; req->rkey = keyed_sgl->key; req->length = keyed_sgl->length; } else if (sgl->type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && sgl->type_specific == SPDK_NVME_SGL_SUBTYPE_OFFSET) { uint64_t offset = sgl->address; uint32_t max_len = rx_desc->bb_sgl.length; if (offset > max_len) { SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n", offset, max_len); goto command_fail; } max_len -= (uint32_t)offset; if (sgl->length > max_len) { SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", sgl->length, max_len); goto command_fail; } buf = rx_desc->bb + offset; len = sgl->length; } else { SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type %2x, Subtype %2x\n", sgl->type, sgl->type_specific); goto command_fail; } /* for any I/O that requires rdma data to be pulled into target BB before processing by the backend NVMe device */ if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { if (len > 0 && sgl->type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) { SPDK_TRACELOG(SPDK_TRACE_RDMA, " Issuing RDMA Read to get host data\n"); /* data to be copied from remote host via memory RDMA */ if (req->length < rx_desc->bb_len) { /* temporarily adjust SGE to only copy what the host is prepared to send. */ SPDK_TRACELOG(SPDK_TRACE_DEBUG, " *** modify bb sgl length from %x to %x\n", rx_desc->bb_sgl.length, req->length); rx_desc->bb_sgl.length = req->length; } req->pending = NVMF_PENDING_WRITE; ret = nvmf_post_rdma_read(tx_desc->conn, tx_desc); if (ret) { SPDK_ERRLOG("Unable to post rdma read tx descriptor\n"); goto command_fail; } /* Need to wait for RDMA completion indication where it will continue I/O operation */ return 0; } } } /* send to NVMf library for backend NVMe processing */ ret = nvmf_process_io_cmd(req->session, cmd, buf, len, req); if (ret) { /* library failed the request and should have Updated the response */ SPDK_TRACELOG(SPDK_TRACE_RDMA, "send nvme io cmd capsule error response\n"); ret = spdk_nvmf_send_response(conn, req); if (ret) { SPDK_ERRLOG("Unable to send aq qp tx descriptor\n"); goto command_fail; } } return 0; command_fail: return -1; }
static int nvmf_process_connect(struct spdk_nvmf_conn *conn, struct nvme_qp_tx_desc *tx_desc) { struct spdk_nvmf_fabric_connect_cmd *connect; struct nvmf_request *req; struct nvme_qp_rx_desc *rx_desc = tx_desc->rx_desc; union sgl_shift *sgl; int ret; connect = (struct spdk_nvmf_fabric_connect_cmd *)&rx_desc->msg_buf; sgl = (union sgl_shift *)&connect->sgl1; /* debug - display the connect capsule */ SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** Connect Capsule *** %p\n", connect); SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** cid = %x ***\n", connect->cid); SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** recfmt = %x ***\n", connect->recfmt); SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** qid = %x ***\n", connect->qid); SPDK_TRACELOG(SPDK_TRACE_NVMF, " *** sqsize = %x ***\n", connect->sqsize); if (sgl->nvmf_sgl.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK && sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { /* Extended data was passed by initiator to target via in-capsule data and not via RDMA SGL xfer. So extended data resides in the rx message buffer */ SPDK_TRACELOG(SPDK_TRACE_NVMF, " Using In-Capsule connect data\n"); if (rx_desc->recv_bc < (sizeof(struct spdk_nvmf_fabric_connect_cmd) + sizeof(struct spdk_nvmf_fabric_connect_data))) { SPDK_ERRLOG("insufficient in-capsule data to satisfy connect!\n"); goto connect_fail; } nvmf_connect_continue(conn, tx_desc); } else if (sgl->nvmf_sgl.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK && (sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS || sgl->nvmf_sgl.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) { /* setup a new SQE that uses local bounce buffer */ req = &tx_desc->req_state; req->remote_addr = sgl->nvmf_sgl.address; req->rkey = sgl->nvmf_sgl.key; req->pending = NVMF_PENDING_CONNECT; SPDK_TRACELOG(SPDK_TRACE_RDMA, " Issuing RDMA Read to get host connect data\n"); /* data to be copied from host via memory RDMA */ if (sgl->nvmf_sgl.length < rx_desc->bb_len) { /* temporarily adjust SGE to only copy what the host is prepared to send. */ SPDK_TRACELOG(SPDK_TRACE_DEBUG, " *** modify bb sgl length from %x to %x\n", rx_desc->bb_sgl.length, sgl->nvmf_sgl.length); rx_desc->bb_sgl.length = sgl->nvmf_sgl.length; } ret = nvmf_post_rdma_read(tx_desc->conn, tx_desc); if (ret) { SPDK_ERRLOG("Unable to post rdma read tx descriptor\n"); goto connect_fail; } /* Need to wait for RDMA completion indication where it will continue connect operation */ } else { SPDK_ERRLOG("Invalid NVMf Connect SGL: Type %2x, Subtype %2x\n", sgl->nvmf_sgl.type, sgl->nvmf_sgl.subtype); goto connect_fail; } return 0; connect_fail: return -1; }
static int nvmf_cq_event_handler(struct spdk_nvmf_conn *conn) { struct ibv_wc wc; struct nvme_qp_tx_desc *tx_desc; struct nvmf_request *req; int rc; int cq_count = 0; int i; for (i = 0; i < conn->sq_depth; i++) { tx_desc = NULL; /* if an overflow condition was hit we want to stop all processing, but do not disconnect. */ if (conn->state == CONN_STATE_OVERFLOW) break; rc = ibv_poll_cq(conn->cq, 1, &wc); if (rc == 0) // No completions at this time break; if (rc < 0) { SPDK_ERRLOG("Poll CQ error!(%d): %s\n", errno, strerror(errno)); goto handler_error; } /* OK, process the single successful cq event */ cq_count += rc; if (wc.status) { SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d, exiting handler\n", wc.status); break; } switch (wc.opcode) { case IBV_WC_SEND: SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ send completion\n"); tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; nvmf_deactive_tx_desc(tx_desc); break; case IBV_WC_RDMA_WRITE: /* * Will get this event only if we set IBV_SEND_SIGNALED * flag in rdma_write, to trace rdma write latency */ SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma write completion\n"); tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)tx_desc->rx_desc, 0); break; case IBV_WC_RDMA_READ: SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma read completion\n"); tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id; spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)tx_desc->rx_desc, 0); req = &tx_desc->req_state; if (req->pending == NVMF_PENDING_WRITE) { req->pending = NVMF_PENDING_NONE; rc = nvmf_io_cmd_continue(conn, tx_desc); if (rc) { SPDK_ERRLOG("error from io cmd continue\n"); goto handler_error; } /* * Check for any pending rdma_reads to start */ conn->pending_rdma_read_count--; if (!STAILQ_EMPTY(&conn->qp_pending_desc)) { tx_desc = STAILQ_FIRST(&conn->qp_pending_desc); STAILQ_REMOVE_HEAD(&conn->qp_pending_desc, link); STAILQ_INSERT_TAIL(&conn->qp_tx_active_desc, tx_desc, link); SPDK_TRACELOG(SPDK_TRACE_RDMA, "Issue rdma read from pending queue: tx_desc %p\n", tx_desc); rc = nvmf_post_rdma_read(conn, tx_desc); if (rc) { SPDK_ERRLOG("Unable to post pending rdma read descriptor\n"); goto handler_error; } } } else if (req->pending == NVMF_PENDING_CONNECT) { req->pending = NVMF_PENDING_NONE; nvmf_connect_continue(conn, tx_desc); } break; case IBV_WC_RECV: SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ recv completion\n"); spdk_trace_record(TRACE_NVMF_IO_START, 0, 0, wc.wr_id, 0); rc = nvmf_recv(conn, &wc); if (rc) { SPDK_ERRLOG("nvmf_recv processing failure\n"); goto handler_error; } break; default: SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n"); goto handler_error; } } return cq_count; handler_error: if (tx_desc != NULL) nvmf_deactive_tx_desc(tx_desc); SPDK_ERRLOG("handler error, exiting!\n"); return -1; }