static int fio_syslet_queue(struct thread_data *td, struct io_u *io_u) { struct syslet_data *sd = td->io_ops->data; union indirect_params params; struct indirect_registers regs; int ret; fio_ro_check(td, io_u); memset(¶ms, 0, sizeof(params)); fill_syslet_args(¶ms.syslet, sd->ring, (long)io_u, ret_func, sd->stack); fio_syslet_prep(io_u, ®s); ret = syscall(__NR_indirect, ®s, ¶ms, sizeof(params), 0); if (ret == (int) io_u->xfer_buflen) { /* * completed sync, account. this also catches fsync(). */ return FIO_Q_COMPLETED; } else if (ret < 0) { /* * queued for async execution */ if (errno == ESYSLETPENDING) return FIO_Q_QUEUED; } io_u->error = errno; td_verror(td, io_u->error, "xfer"); return FIO_Q_COMPLETED; }
/* * The ->queue() hook is responsible for initiating io on the io_u * being passed in. If the io engine is a synchronous one, io may complete * before ->queue() returns. Required. * * The io engine must transfer in the direction noted by io_u->ddir * to the buffer pointed to by io_u->xfer_buf for as many bytes as * io_u->xfer_buflen. Residual data count may be set in io_u->resid * for a short read/write. */ static int fio_unvme_queue(struct thread_data *td, struct io_u *io_u) { /* * Double sanity check to catch errant write on a readonly setup */ fio_ro_check(td, io_u); int ret = 1; unvme_page_t* page = io_u->engine_data; page->slba = io_u->offset / unvme.ns->blocksize; page->nlb = io_u->xfer_buflen / unvme.ns->blocksize; switch (io_u->ddir) { case DDIR_READ: TDEBUG("READ page=%d lba=%#lx", page->id, page->slba); ret = unvme_aread(unvme.ns, page); break; case DDIR_WRITE: TDEBUG("WRITE page=%d lba=%#lx", page->id, page->slba); ret = unvme_awrite(unvme.ns, page); break; default: break; } /* * Could return FIO_Q_QUEUED for a queued request, * FIO_Q_COMPLETED for a completed request, and FIO_Q_BUSY * if we could queue no more at this point (you'd have to * define ->commit() to handle that. */ return ret ? FIO_Q_COMPLETED : FIO_Q_QUEUED; }
static int fio_pmemblk_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; fio_pmemblk_file_t pmb = FIOFILEPMBGET(f); unsigned long long off; unsigned long len; void *buf; int (*blkop) (PMEMblkpool *, void *, off_t) = (void *)pmemblk_write; fio_ro_check(td, io_u); switch (io_u->ddir) { case DDIR_READ: blkop = pmemblk_read; /* fall through */ case DDIR_WRITE: off = io_u->offset; len = io_u->xfer_buflen; io_u->error = EINVAL; if (off % pmb->pmb_bsize) break; if (len % pmb->pmb_bsize) break; if ((off + len) / pmb->pmb_bsize > pmb->pmb_nblocks) break; io_u->error = 0; buf = io_u->xfer_buf; off /= pmb->pmb_bsize; len /= pmb->pmb_bsize; while (0 < len) { if (0 != blkop(pmb->pmb_pool, buf, off)) { io_u->error = errno; break; } buf += pmb->pmb_bsize; off++; len--; } off *= pmb->pmb_bsize; len *= pmb->pmb_bsize; io_u->resid = io_u->xfer_buflen - (off - io_u->offset); break; case DDIR_SYNC: case DDIR_DATASYNC: case DDIR_SYNC_FILE_RANGE: /* we're always sync'd */ io_u->error = 0; break; default: io_u->error = EINVAL; break; } return FIO_Q_COMPLETED; }
static int fio_guasi_queue(struct thread_data *td, struct io_u *io_u) { struct guasi_data *ld = td->io_ops->data; fio_ro_check(td, io_u); GDBG_PRINT(("fio_guasi_queue(%p)\n", io_u)); if (ld->queued_nr == (int) td->o.iodepth) return FIO_Q_BUSY; ld->io_us[ld->queued_nr] = io_u; ld->queued_nr++; return FIO_Q_QUEUED; }
static int fio_null_queue(struct thread_data *td, struct io_u *io_u) { struct null_data *nd = (struct null_data *) td->io_ops_data; fio_ro_check(td, io_u); if (td->io_ops->flags & FIO_SYNCIO) return FIO_Q_COMPLETED; if (nd->events) return FIO_Q_BUSY; nd->io_us[nd->queued++] = io_u; return FIO_Q_QUEUED; }
static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; int ret; fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen); else if (io_u->ddir == DDIR_WRITE) ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen); else ret = fsync(f->fd); return fio_io_end(td, io_u, ret); }
static int fio_rdmaio_queue(struct thread_data *td, struct io_u *io_u) { struct rdmaio_data *rd = td->io_ops->data; fio_ro_check(td, io_u); if (rd->io_u_queued_nr == (int)td->o.iodepth) return FIO_Q_BUSY; rd->io_us_queued[rd->io_u_queued_nr] = io_u; rd->io_u_queued_nr++; dprint_io_u(io_u, "fio_rdmaio_queue"); return FIO_Q_QUEUED; }
static int fio_solarisaio_queue(struct thread_data fio_unused *td, struct io_u *io_u) { struct solarisaio_data *sd = td->io_ops->data; struct fio_file *f = io_u->file; off_t off; int ret; fio_ro_check(td, io_u); if (io_u->ddir == DDIR_SYNC) { if (sd->nr) return FIO_Q_BUSY; if (fsync(f->fd) < 0) io_u->error = errno; return FIO_Q_COMPLETED; } if (io_u->ddir == DDIR_DATASYNC) { if (sd->nr) return FIO_Q_BUSY; if (fdatasync(f->fd) < 0) io_u->error = errno; return FIO_Q_COMPLETED; } if (sd->nr == sd->max_depth) return FIO_Q_BUSY; off = io_u->offset; if (io_u->ddir == DDIR_READ) ret = aioread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off, SEEK_SET, &io_u->resultp); else ret = aiowrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, off, SEEK_SET, &io_u->resultp); if (ret) { io_u->error = errno; td_verror(td, io_u->error, "xfer"); return FIO_Q_COMPLETED; } sd->nr++; return FIO_Q_QUEUED; }
static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; int ret; fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) ret = pread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); else if (io_u->ddir == DDIR_WRITE) ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset); else if (io_u->ddir == DDIR_TRIM) { do_io_u_trim(td, io_u); return FIO_Q_COMPLETED; } else ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); }
static enum fio_q_status fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; struct fio_mmap_data *fmd = FILE_ENG_DATA(f); fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); else if (io_u->ddir == DDIR_WRITE) memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen); else if (ddir_sync(io_u->ddir)) { if (msync(fmd->mmap_ptr, fmd->mmap_sz, MS_SYNC)) { io_u->error = errno; td_verror(td, io_u->error, "msync"); } } else if (io_u->ddir == DDIR_TRIM) { int ret = do_io_u_trim(td, io_u); if (!ret) td_verror(td, io_u->error, "trim"); } /* * not really direct, but should drop the pages from the cache */ if (td->o.odirect && ddir_rw(io_u->ddir)) { if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) { io_u->error = errno; td_verror(td, io_u->error, "msync"); } if (posix_madvise(io_u->mmap_data, io_u->xfer_buflen, POSIX_MADV_DONTNEED) < 0) { io_u->error = errno; td_verror(td, io_u->error, "madvise"); } } return FIO_Q_COMPLETED; }
static int fio_gf_queue(struct thread_data *td, struct io_u *io_u) { struct gf_data *g = td->io_ops_data; int ret = 0; dprint(FD_FILE, "fio queue len %lu\n", io_u->xfer_buflen); fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) ret = glfs_read(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0); else if (io_u->ddir == DDIR_WRITE) ret = glfs_write(g->fd, io_u->xfer_buf, io_u->xfer_buflen, 0); else if (io_u->ddir == DDIR_SYNC) ret = glfs_fsync(g->fd); else if (io_u->ddir == DDIR_DATASYNC) ret = glfs_fdatasync(g->fd); else { log_err("unsupported operation.\n"); return -EINVAL; } dprint(FD_FILE, "fio len %lu ret %d\n", io_u->xfer_buflen, ret); if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) LAST_POS(io_u->file) = io_u->offset + ret; if (ret != (int)io_u->xfer_buflen) { if (ret >= 0) { io_u->resid = io_u->xfer_buflen - ret; io_u->error = 0; return FIO_Q_COMPLETED; } else io_u->error = errno; } if (io_u->error) { log_err("IO failed.\n"); td_verror(td, io_u->error, "xfer"); } return FIO_Q_COMPLETED; }
static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u) { struct syncio_data *sd = td->io_ops->data; fio_ro_check(td, io_u); if (!fio_vsyncio_append(td, io_u)) { dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued); /* * If we can't append and have stuff queued, tell fio to * commit those first and then retry this io */ if (sd->queued) return FIO_Q_BUSY; if (io_u->ddir == DDIR_SYNC) { int ret = fsync(io_u->file->fd); return fio_io_end(td, io_u, ret); } else if (io_u->ddir == DDIR_DATASYNC) { int ret = fdatasync(io_u->file->fd); return fio_io_end(td, io_u, ret); } sd->queued = 0; sd->queued_bytes = 0; fio_vsyncio_set_iov(sd, io_u, 0); } else { if (sd->queued == td->o.iodepth) { dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued); return FIO_Q_BUSY; } dprint(FD_IO, "vsyncio_queue: append\n"); fio_vsyncio_set_iov(sd, io_u, sd->queued); } dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued); return FIO_Q_QUEUED; }
static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; unsigned long long real_off = io_u->offset - f->file_offset; fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) memcpy(io_u->xfer_buf, f->mmap + real_off, io_u->xfer_buflen); else if (io_u->ddir == DDIR_WRITE) memcpy(f->mmap + real_off, io_u->xfer_buf, io_u->xfer_buflen); else if (io_u->ddir == DDIR_SYNC) { size_t len = (f->io_size + page_size - 1) & ~page_mask; if (msync(f->mmap, len, MS_SYNC)) { io_u->error = errno; td_verror(td, io_u->error, "msync"); } } /* * not really direct, but should drop the pages from the cache */ if (td->o.odirect && io_u->ddir != DDIR_SYNC) { size_t len = (io_u->xfer_buflen + page_size - 1) & ~page_mask; unsigned long long off = real_off & ~page_mask; if (msync(f->mmap + off, len, MS_SYNC) < 0) { io_u->error = errno; td_verror(td, io_u->error, "msync"); } if (madvise(f->mmap + off, len, MADV_DONTNEED) < 0) { io_u->error = errno; td_verror(td, io_u->error, "madvise"); } } return FIO_Q_COMPLETED; }
static enum fio_q_status fio_libpmem_queue(struct thread_data *td, struct io_u *io_u) { fio_ro_check(td, io_u); io_u->error = 0; dprint(FD_IO, "DEBUG fio_libpmem_queue\n"); switch (io_u->ddir) { case DDIR_READ: memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); break; case DDIR_WRITE: dprint(FD_IO, "DEBUG mmap_data=%p, xfer_buf=%p\n", io_u->mmap_data, io_u->xfer_buf ); dprint(FD_IO,"td->o.odirect %d \n",td->o.odirect); if (td->o.odirect) { pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen); } else { pmem_memcpy_nodrain(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen); } break; case DDIR_SYNC: case DDIR_DATASYNC: case DDIR_SYNC_FILE_RANGE: break; default: io_u->error = EINVAL; break; } return FIO_Q_COMPLETED; }
static enum fio_q_status fio_ioring_queue(struct thread_data *td, struct io_u *io_u) { struct ioring_data *ld = td->io_ops_data; struct io_sq_ring *ring = &ld->sq_ring; unsigned tail, next_tail; fio_ro_check(td, io_u); if (ld->queued == ld->iodepth) return FIO_Q_BUSY; if (io_u->ddir == DDIR_TRIM) { if (ld->queued) return FIO_Q_BUSY; do_io_u_trim(td, io_u); io_u_mark_submit(td, 1); io_u_mark_complete(td, 1); return FIO_Q_COMPLETED; } tail = *ring->tail; next_tail = tail + 1; read_barrier(); if (next_tail == *ring->head) return FIO_Q_BUSY; /* ensure sqe stores are ordered with tail update */ write_barrier(); ring->array[tail & ld->sq_ring_mask] = io_u->index; *ring->tail = next_tail; write_barrier(); ld->queued++; return FIO_Q_QUEUED; }
static int fio_pvsyncio_queue(struct thread_data *td, struct io_u *io_u) { struct syncio_data *sd = td->io_ops->data; struct iovec *iov = &sd->iovecs[0]; struct fio_file *f = io_u->file; int ret; fio_ro_check(td, io_u); iov->iov_base = io_u->xfer_buf; iov->iov_len = io_u->xfer_buflen; if (io_u->ddir == DDIR_READ) ret = preadv(f->fd, iov, 1, io_u->offset); else if (io_u->ddir == DDIR_WRITE) ret = pwritev(f->fd, iov, 1, io_u->offset); else if (io_u->ddir == DDIR_TRIM) { do_io_u_trim(td, io_u); return FIO_Q_COMPLETED; } else ret = do_io_u_sync(td, io_u); return fio_io_end(td, io_u, ret); }
static int fio_mtd_queue(struct thread_data *td, struct io_u *io_u) { struct fio_file *f = io_u->file; struct fio_mtd_data *fmd = FILE_ENG_DATA(f); int local_offs = 0; int ret; fio_ro_check(td, io_u); /* * Errors tend to pertain to particular erase blocks, so divide up * I/O to erase block size. * If an error is encountered, log it and keep going onto the next * block because the error probably just pertains to that block. * TODO(dehrenberg): Divide up reads and writes into page-sized * operations to get more fine-grained information about errors. */ while (local_offs < io_u->buflen) { int eb = (io_u->offset + local_offs) / fmd->info.eb_size; int eb_offs = (io_u->offset + local_offs) % fmd->info.eb_size; /* The length is the smaller of the length remaining in the * buffer and the distance to the end of the erase block */ int len = min((int)io_u->buflen - local_offs, (int)fmd->info.eb_size - eb_offs); char *buf = ((char *)io_u->buf) + local_offs; if (td->o.skip_bad) { ret = fio_mtd_is_bad(td, fmd, io_u, eb); if (ret == -1) break; else if (ret == 1) goto next; } if (io_u->ddir == DDIR_READ) { ret = mtd_read(&fmd->info, f->fd, eb, eb_offs, buf, len); if (ret != 0) { io_u->error = errno; td_verror(td, errno, "mtd_read"); if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) break; } } else if (io_u->ddir == DDIR_WRITE) { ret = mtd_write(desc, &fmd->info, f->fd, eb, eb_offs, buf, len, NULL, 0, 0); if (ret != 0) { io_u->error = errno; td_verror(td, errno, "mtd_write"); if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) break; } } else if (io_u->ddir == DDIR_TRIM) { if (eb_offs != 0 || len != fmd->info.eb_size) { io_u->error = EINVAL; td_verror(td, EINVAL, "trim on MTD must be erase block-aligned"); } ret = mtd_erase(desc, &fmd->info, f->fd, eb); if (ret != 0) { io_u->error = errno; td_verror(td, errno, "mtd_erase"); if (fio_mtd_maybe_mark_bad(td, fmd, io_u, eb)) break; } } else { io_u->error = ENOTSUP; td_verror(td, io_u->error, "operation not supported on mtd"); } next: local_offs += len; } return FIO_Q_COMPLETED; }
/* * For splice writing, we can vmsplice our data buffer directly into a * pipe and then splice that to a file. */ static int fio_splice_write(struct thread_data *td, struct io_u *io_u) { struct spliceio_data *sd = td->io_ops->data; struct iovec iov = { .iov_base = io_u->xfer_buf, .iov_len = io_u->xfer_buflen, }; struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; struct fio_file *f = io_u->file; off_t off = io_u->offset; int ret, ret2; while (iov.iov_len) { if (poll(&pfd, 1, -1) < 0) return errno; ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK); if (ret < 0) return -errno; iov.iov_len -= ret; iov.iov_base += ret; while (ret) { ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0); if (ret2 < 0) return -errno; ret -= ret2; } } return io_u->xfer_buflen; } static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) { struct spliceio_data *sd = td->io_ops->data; int uninitialized_var(ret); fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) { if (sd->vmsplice_to_user) { ret = fio_splice_read(td, io_u); /* * This kernel doesn't support vmsplice to user * space. Reset the vmsplice_to_user flag, so that * we retry below and don't hit this path again. */ if (ret == -EBADF) sd->vmsplice_to_user = 0; } if (!sd->vmsplice_to_user) ret = fio_splice_read_old(td, io_u); } else if (io_u->ddir == DDIR_WRITE) ret = fio_splice_write(td, io_u); else ret = fsync(io_u->file->fd); if (ret != (int) io_u->xfer_buflen) { if (ret >= 0) { io_u->resid = io_u->xfer_buflen - ret; io_u->error = 0; return FIO_Q_COMPLETED; } else io_u->error = errno; } if (io_u->error) { td_verror(td, io_u->error, "xfer"); if (io_u->error == EINVAL) log_err("fio: looks like splice doesn't work on this" " file system\n"); } return FIO_Q_COMPLETED; } static void fio_spliceio_cleanup(struct thread_data *td) { struct spliceio_data *sd = td->io_ops->data; if (sd) { close(sd->pipe[0]); close(sd->pipe[1]); free(sd); } } static int fio_spliceio_init(struct thread_data *td) { struct spliceio_data *sd = malloc(sizeof(*sd)); if (pipe(sd->pipe) < 0) { td_verror(td, errno, "pipe"); free(sd); return 1; } /* * Assume this work, we'll reset this if it doesn't */ sd->vmsplice_to_user = 1; /* * Works with "real" vmsplice to user, eg mapping pages directly. * Reset if we fail. */ sd->vmsplice_to_user_map = 1; /* * And if vmsplice_to_user works, we definitely need aligned * buffers. Just set ->odirect to force that. */ if (td_read(td)) td->o.odirect = 1; td->io_ops->data = sd; return 0; } static struct ioengine_ops ioengine = { .name = "splice", .version = FIO_IOOPS_VERSION, .init = fio_spliceio_init, .queue = fio_spliceio_queue, .cleanup = fio_spliceio_cleanup, .open_file = generic_open_file, .close_file = generic_close_file, .get_file_size = generic_get_file_size, .flags = FIO_SYNCIO | FIO_PIPEIO, }; #else /* FIO_HAVE_SPLICE */ /* * When we have a proper configure system in place, we simply wont build * and install this io engine. For now install a crippled version that * just complains and fails to load. */ static int fio_spliceio_init(struct thread_data fio_unused *td) { fprintf(stderr, "fio: splice not available\n"); return 1; } static struct ioengine_ops ioengine = { .name = "splice", .version = FIO_IOOPS_VERSION, .init = fio_spliceio_init, }; #endif static void fio_init fio_spliceio_register(void) { register_ioengine(&ioengine); } static void fio_exit fio_spliceio_unregister(void) { unregister_ioengine(&ioengine); }