static uint8_t do_rw_prp(NVMEState *n, uint64_t mem_addr, uint64_t *data_size_p, uint64_t *file_offset_p, uint8_t *mapping_addr, uint8_t rw) { uint64_t data_len; if (*data_size_p == 0) { return FAIL; } /* Data Len to be written per page basis */ data_len = PAGE_SIZE - (mem_addr % PAGE_SIZE); if (data_len > *data_size_p) { data_len = *data_size_p; } LOG_DBG("File offset for read/write:%ld", *file_offset_p); LOG_DBG("Length for read/write:%ld", data_len); LOG_DBG("Address for read/write:%ld", mem_addr); switch (rw) { case NVME_CMD_READ: LOG_DBG("Read cmd called"); nvme_dma_mem_write(mem_addr, (mapping_addr + *file_offset_p), data_len); break; case NVME_CMD_WRITE: LOG_DBG("Write cmd called"); nvme_dma_mem_read(mem_addr, (mapping_addr + *file_offset_p), data_len); break; default: LOG_ERR("Error- wrong opcode: %d", rw); return FAIL; } *file_offset_p = *file_offset_p + data_len; *data_size_p = *data_size_p - data_len; return NVME_SC_SUCCESS; }
uint8_t nvme_io_command(NVMEState *n, NVMECmd *sqe, NVMECQE *cqe) { NVME_rw *e = (NVME_rw *)sqe; NVMEStatusField *sf = (NVMEStatusField *)&cqe->status; uint8_t res = FAIL; uint64_t data_size, file_offset; uint8_t *mapping_addr; uint32_t nvme_blk_sz; DiskInfo *disk; uint8_t lba_idx; sf->sc = NVME_SC_SUCCESS; LOG_DBG("%s(): called", __func__); /* As of NVMe spec rev 1.0b "All NVM cmds use the CMD.DW1 (NSID) field". * Thus all NVM cmd set cmds must check for illegal namespaces up front */ if (e->nsid == 0 || (e->nsid > n->idtfy_ctrl->nn)) { LOG_NORM("%s(): Invalid nsid:%u", __func__, e->nsid); sf->sc = NVME_SC_INVALID_NAMESPACE; return FAIL; } if (sqe->opcode == NVME_CMD_FLUSH) { return NVME_SC_SUCCESS; } else if (sqe->opcode == NVME_CMD_DSM) { return NVME_SC_SUCCESS; } else if ((sqe->opcode != NVME_CMD_READ) && (sqe->opcode != NVME_CMD_WRITE)) { LOG_NORM("%s():Wrong IO opcode:\t\t0x%02x", __func__, sqe->opcode); sf->sc = NVME_SC_INVALID_OPCODE; return FAIL; } disk = &n->disk[e->nsid - 1]; if ((e->slba + e->nlb) >= disk->idtfy_ns.nsze) { LOG_NORM("%s(): LBA out of range", __func__); sf->sc = NVME_SC_LBA_RANGE; return FAIL; } else if ((e->slba + e->nlb) >= disk->idtfy_ns.ncap) { LOG_NORM("%s():Capacity Exceeded", __func__); sf->sc = NVME_SC_CAP_EXCEEDED; return FAIL; } lba_idx = disk->idtfy_ns.flbas & 0xf; if ((e->mptr == 0) && /* if NOT supplying separate meta buffer */ (disk->idtfy_ns.lbafx[lba_idx].ms != 0) && /* if using metadata */ ((disk->idtfy_ns.flbas & 0x10) == 0)) { /* if using separate buffer */ LOG_ERR("%s(): invalid meta-data for extended lba", __func__); sf->sc = NVME_SC_INVALID_FIELD; return FAIL; } /* Read in the command */ nvme_blk_sz = NVME_BLOCK_SIZE(disk->idtfy_ns.lbafx[lba_idx].lbads); LOG_DBG("NVME Block size: %u", nvme_blk_sz); data_size = (e->nlb + 1) * nvme_blk_sz; if (disk->idtfy_ns.flbas & 0x10) { data_size += (disk->idtfy_ns.lbafx[lba_idx].ms * (e->nlb + 1)); } if (n->idtfy_ctrl->mdts && data_size > PAGE_SIZE * (1 << (n->idtfy_ctrl->mdts))) { LOG_ERR("%s(): data size:%ld exceeds max:%ld", __func__, data_size, ((uint64_t)PAGE_SIZE) * (1 << (n->idtfy_ctrl->mdts))); sf->sc = NVME_SC_INVALID_FIELD; return FAIL; } file_offset = e->slba * nvme_blk_sz; mapping_addr = disk->mapping_addr; /* Namespace not ready */ if (mapping_addr == NULL) { LOG_NORM("%s():Namespace not ready", __func__); sf->sc = NVME_SC_NS_NOT_READY; return FAIL; } /* Writing/Reading PRP1 */ res = do_rw_prp(n, e->prp1, &data_size, &file_offset, mapping_addr, e->opcode); if (res == FAIL) { return FAIL; } if (data_size > 0) { if (data_size <= PAGE_SIZE) { res = do_rw_prp(n, e->prp2, &data_size, &file_offset, mapping_addr, e->opcode); } else { res = do_rw_prp_list(n, sqe, &data_size, &file_offset, mapping_addr); } if (res == FAIL) { return FAIL; } } /* Spec states that non-zero meta data buffers shall be ignored, i.e. no * error reported, when the DW4&5 (MPTR) field is not in use */ if ((e->mptr != 0) && /* if supplying separate meta buffer */ (disk->idtfy_ns.lbafx[lba_idx].ms != 0) && /* if using metadata */ ((disk->idtfy_ns.flbas & 0x10) == 0)) { /* if using separate buffer */ /* Then go ahead and use the separate meta data buffer */ unsigned int ms, meta_offset, meta_size; uint8_t *meta_mapping_addr; ms = disk->idtfy_ns.lbafx[lba_idx].ms; meta_offset = e->slba * ms; meta_size = (e->nlb + 1) * ms; meta_mapping_addr = disk->meta_mapping_addr + meta_offset; if (e->opcode == NVME_CMD_READ) { nvme_dma_mem_write(e->mptr, meta_mapping_addr, meta_size); } else if (e->opcode == NVME_CMD_WRITE) { nvme_dma_mem_read(e->mptr, meta_mapping_addr, meta_size); } } nvme_update_stats(n, disk, e->opcode, e->slba, e->nlb); return res; }
void process_sq(NVMEState *n, uint16_t sq_id) { target_phys_addr_t addr; uint16_t cq_id; NVMECmd sqe; NVMECQE cqe; NVMEStatusField *sf = (NVMEStatusField *) &cqe.status; if (n->sq[sq_id].dma_addr == 0 || n->cq[n->sq[sq_id].cq_id].dma_addr == 0) { LOG_ERR("Required Submission/Completion Queue does not exist"); n->sq[sq_id].head = n->sq[sq_id].tail = 0; goto exit; } cq_id = n->sq[sq_id].cq_id; if (is_cq_full(n, cq_id)) { return; } memset(&cqe, 0, sizeof(cqe)); LOG_DBG("%s(): called", __func__); /* Process SQE */ if (sq_id == ASQ_ID || n->sq[sq_id].phys_contig) { addr = n->sq[sq_id].dma_addr + n->sq[sq_id].head * sizeof(sqe); } else { /* PRP implementation */ addr = find_discontig_queue_entry(n->page_size, n->sq[sq_id].head, sizeof(sqe), n->sq[sq_id].dma_addr); } nvme_dma_mem_read(addr, (uint8_t *)&sqe, sizeof(sqe)); if (n->abort) { if (abort_command(n, sq_id, &sqe)) { incr_sq_head(&n->sq[sq_id]); return; } } incr_sq_head(&n->sq[sq_id]); if (sq_id == ASQ_ID) { nvme_admin_command(n, &sqe, &cqe); } else { /* TODO add support for IO commands with different sizes of Q elements */ nvme_io_command(n, &sqe, &cqe); } /* Filling up the CQ entry */ cqe.sq_id = sq_id; cqe.sq_head = n->sq[sq_id].head; cqe.command_id = sqe.cid; sf->p = n->cq[cq_id].phase_tag; sf->m = 0; sf->dnr = 0; /* TODO add support for dnr */ /* write cqe to completion queue */ if (cq_id == ACQ_ID || n->cq[cq_id].phys_contig) { addr = n->cq[cq_id].dma_addr + n->cq[cq_id].tail * sizeof(cqe); } else { /* PRP implementation */ addr = find_discontig_queue_entry(n->page_size, n->cq[cq_id].tail, sizeof(cqe), n->cq[cq_id].dma_addr); } nvme_dma_mem_write(addr, (uint8_t *)&cqe, sizeof(cqe)); incr_cq_tail(&n->cq[cq_id]); if (cq_id == ACQ_ID) { /* 3.1.9 says: "This queue is always associated with interrupt vector 0" */ msix_notify(&(n->dev), 0); return; } if (n->cq[cq_id].irq_enabled) { msix_notify(&(n->dev), n->cq[cq_id].vector); } else { LOG_NORM("kw q: IRQ not enabled for CQ: %d", cq_id); } exit: return; }