static int fill_io_u(struct thread_data *td, struct io_u *io_u) { if (td->io_ops->flags & FIO_NOIO) goto out; set_rw_ddir(td, io_u); /* * fsync() or fdatasync() or trim etc, we are done */ if (!ddir_rw(io_u->ddir)) goto out; /* * See if it's time to switch to a new zone */ if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) { td->zone_bytes = 0; io_u->file->file_offset += td->o.zone_range + td->o.zone_skip; io_u->file->last_pos = io_u->file->file_offset; td->io_skip_bytes += td->o.zone_skip; } /* * No log, let the seq/rand engine retrieve the next buflen and * position. */ if (get_next_offset(td, io_u)) { dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); return 1; } io_u->buflen = get_next_buflen(td, io_u); if (!io_u->buflen) { dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); return 1; } if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { dprint(FD_IO, "io_u %p, offset too large\n", io_u); dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, io_u->buflen, io_u->file->real_file_size); return 1; } /* * mark entry before potentially trimming io_u */ if (td_random(td) && file_randommap(td, io_u->file)) mark_random_map(td, io_u); /* * If using a write iolog, store this entry. */ out: dprint_io_u(io_u, "fill_io_u"); td->zone_bytes += io_u->buflen; log_io_u(td, io_u); return 0; }
static int fio_rdmaio_queue(struct thread_data *td, struct io_u *io_u) { struct rdmaio_data *rd = td->io_ops->data; fio_ro_check(td, io_u); if (rd->io_u_queued_nr == (int)td->o.iodepth) return FIO_Q_BUSY; rd->io_us_queued[rd->io_u_queued_nr] = io_u; rd->io_u_queued_nr++; dprint_io_u(io_u, "fio_rdmaio_queue"); return FIO_Q_QUEUED; }
static struct io_u *fio_rdmaio_event(struct thread_data *td, int event) { struct rdmaio_data *rd = td->io_ops->data; struct io_u *io_u; int i; io_u = rd->io_us_completed[0]; for (i = 0; i < rd->io_u_completed_nr - 1; i++) rd->io_us_completed[i] = rd->io_us_completed[i + 1]; rd->io_u_completed_nr--; dprint_io_u(io_u, "fio_rdmaio_event"); return io_u; }
static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { struct fio_file *f; dprint_io_u(io_u, "io complete"); td_io_u_lock(td); assert(io_u->flags & IO_U_F_FLIGHT); io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK); /* * Mark IO ok to verify */ if (io_u->ipo) { io_u->ipo->flags &= ~IP_F_IN_FLIGHT; write_barrier(); } td_io_u_unlock(td); if (ddir_sync(io_u->ddir)) { td->last_was_sync = 1; f = io_u->file; if (f) { f->first_write = -1ULL; f->last_write = -1ULL; } return; } td->last_was_sync = 0; td->last_ddir = io_u->ddir; if (!io_u->error && ddir_rw(io_u->ddir)) { unsigned int bytes = io_u->buflen - io_u->resid; const enum fio_ddir idx = io_u->ddir; const enum fio_ddir odx = io_u->ddir ^ 1; int ret; td->io_blocks[idx]++; td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; if (!(io_u->flags & IO_U_F_VER_LIST)) td->this_io_bytes[idx] += bytes; if (idx == DDIR_WRITE) { f = io_u->file; if (f) { if (f->first_write == -1ULL || io_u->offset < f->first_write) f->first_write = io_u->offset; if (f->last_write == -1ULL || ((io_u->offset + bytes) > f->last_write)) f->last_write = io_u->offset + bytes; } } if (ramp_time_over(td) && (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING)) { account_io_completion(td, io_u, icd, idx, bytes); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = (usec_for_io(td, idx) - utime_since_now(&td->start)); } if (idx != DDIR_TRIM && __should_check_rate(td, odx)) td->rate_pending_usleep[odx] = (usec_for_io(td, odx) - utime_since_now(&td->start)); } icd->bytes_done[idx] += bytes; if (io_u->end_io) { ret = io_u->end_io(td, io_u); if (ret && !icd->error) icd->error = ret; } } else if (io_u->error) { icd->error = io_u->error; io_u_log_error(td, io_u); } if (icd->error) { enum error_type_bit eb = td_error_type(io_u->ddir, icd->error); if (!td_non_fatal_error(td, eb, icd->error)) return; /* * If there is a non_fatal error, then add to the error count * and clear all the errors. */ update_error_count(td, icd->error); td_clear_error(td); icd->error = 0; io_u->error = 0; } }
static int fio_rdmaio_send(struct thread_data *td, struct io_u **io_us, unsigned int nr) { struct rdmaio_data *rd = td->io_ops->data; struct ibv_send_wr *bad_wr; #if 0 enum ibv_wc_opcode comp_opcode; comp_opcode = IBV_WC_RDMA_WRITE; #endif int i; long index; struct rdma_io_u_data *r_io_u_d; r_io_u_d = NULL; for (i = 0; i < nr; i++) { /* RDMA_WRITE or RDMA_READ */ switch (rd->rdma_protocol) { case FIO_RDMA_MEM_WRITE: /* compose work request */ r_io_u_d = io_us[i]->engine_data; index = __rand(&rd->rand_state) % rd->rmt_nr; r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_WRITE; r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey; r_io_u_d->sq_wr.wr.rdma.remote_addr = \ rd->rmt_us[index].buf; r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen; break; case FIO_RDMA_MEM_READ: /* compose work request */ r_io_u_d = io_us[i]->engine_data; index = __rand(&rd->rand_state) % rd->rmt_nr; r_io_u_d->sq_wr.opcode = IBV_WR_RDMA_READ; r_io_u_d->sq_wr.wr.rdma.rkey = rd->rmt_us[index].rkey; r_io_u_d->sq_wr.wr.rdma.remote_addr = \ rd->rmt_us[index].buf; r_io_u_d->sq_wr.sg_list->length = io_us[i]->buflen; break; case FIO_RDMA_CHA_SEND: r_io_u_d = io_us[i]->engine_data; r_io_u_d->sq_wr.opcode = IBV_WR_SEND; r_io_u_d->sq_wr.send_flags = IBV_SEND_SIGNALED; break; default: log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol); break; } if (ibv_post_send(rd->qp, &r_io_u_d->sq_wr, &bad_wr) != 0) { log_err("fio: ibv_post_send fail\n"); return -1; } dprint_io_u(io_us[i], "fio_rdmaio_send"); } /* wait for completion rdma_poll_wait(td, comp_opcode); */ return i; }
static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { struct fio_file *f; dprint_io_u(io_u, "io complete"); td_io_u_lock(td); assert(io_u->flags & IO_U_F_FLIGHT); io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK); td_io_u_unlock(td); if (ddir_sync(io_u->ddir)) { td->last_was_sync = 1; f = io_u->file; if (f) { f->first_write = -1ULL; f->last_write = -1ULL; } return; } td->last_was_sync = 0; td->last_ddir = io_u->ddir; if (!io_u->error && ddir_rw(io_u->ddir)) { unsigned int bytes = io_u->buflen - io_u->resid; const enum fio_ddir idx = io_u->ddir; const enum fio_ddir odx = io_u->ddir ^ 1; int ret; td->io_blocks[idx]++; td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; if (!(io_u->flags & IO_U_F_VER_LIST)) td->this_io_bytes[idx] += bytes; if (idx == DDIR_WRITE) { f = io_u->file; if (f) { if (f->first_write == -1ULL || io_u->offset < f->first_write) f->first_write = io_u->offset; if (f->last_write == -1ULL || ((io_u->offset + bytes) > f->last_write)) f->last_write = io_u->offset + bytes; } } if (ramp_time_over(td) && (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING)) { account_io_completion(td, io_u, icd, idx, bytes); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = (usec_for_io(td, idx) - utime_since_now(&td->start)); } if (__should_check_latency(td, idx)) { unsigned long lusec = utime_since( &io_u->issue_time, &icd->time); /* Linear increase and logarithmic decrease */ if (lusec > td->o.shed_latency[idx]) { if (td->shed_count[idx] < MAX_SHED_COUNT ) { td->shed_count[idx] += (1<<SHED_FRAC_BITS); } } else if (td->shed_count[idx]) { td->shed_count[idx] -= get_used_bits(td->shed_count[idx]); } if (td->shed_count[idx]) { lusec = (lusec * td->shed_count[idx]) >> SHED_FRAC_BITS; if (lusec > td->rate_pending_usleep[idx]) { td->rate_pending_usleep[idx] = lusec; } } }