/* * Get time to complete an unit work on a particular cpu. * The minimum number in CALIBRATE_RUNS runs is returned. */ static double calibrate_unit(unsigned char *data) { unsigned long t, i, j, k; struct timeval tps; double tunit = 0.0; for (i = 0; i < CALIBRATE_RUNS; i++) { fio_gettime(&tps, NULL); /* scale for less variance */ for (j = 0; j < CALIBRATE_SCALE; j++) { /* unit of work */ for (k=0; k < page_size; k++) { data[(k + j) % page_size] = k % 256; /* * we won't see STOP here. this is to match * the same statement in the profiling loop. */ if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) return 0.0; } } t = utime_since_now(&tps); if (!t) continue; /* get the minimum time to complete CALIBRATE_SCALE units */ if ((i == 0) || ((double)t < tunit)) tunit = (double)t; } return tunit / CALIBRATE_SCALE; }
void fio_time_init(void) { int i; fio_clock_init(); /* * Check the granularity of the nanosleep function */ for (i = 0; i < 10; i++) { struct timeval tv; struct timespec ts; unsigned long elapsed; fio_gettime(&tv, NULL); ts.tv_sec = 0; ts.tv_nsec = 1000; nanosleep(&ts, NULL); elapsed = utime_since_now(&tv); if (elapsed > ns_granularity) ns_granularity = elapsed; } }
void usec_sleep(struct thread_data *td, unsigned long usec) { struct timespec req; struct timeval tv; do { unsigned long ts = usec; if (usec < ns_granularity) { usec_spin(usec); break; } ts = usec - ns_granularity; if (ts >= 1000000) { req.tv_sec = ts / 1000000; ts -= 1000000 * req.tv_sec; } else req.tv_sec = 0; req.tv_nsec = ts * 1000; fio_gettime(&tv, NULL); if (nanosleep(&req, NULL) < 0) break; ts = utime_since_now(&tv); if (ts >= usec) break; usec -= ts; } while (!td->terminate); }
static void iolog_delay(struct thread_data *td, unsigned long delay) { unsigned long usec = utime_since_now(&td->last_issue); unsigned long this_delay; if (delay < usec) return; delay -= usec; /* * less than 100 usec delay, just regard it as noise */ if (delay < 100) return; while (delay && !td->terminate) { this_delay = delay; if (this_delay > 500000) this_delay = 500000; usec_sleep(td, this_delay); delay -= this_delay; } }
/* * busy looping version for the last few usec */ void usec_spin(unsigned int usec) { struct timeval start; fio_gettime(&start, NULL); while (utime_since_now(&start) < usec) nop; }
/* * busy looping version for the last few usec */ uint64_t usec_spin(unsigned int usec) { struct timeval start; uint64_t t; fio_gettime(&start, NULL); while ((t = utime_since_now(&start)) < usec) nop; return t; }
static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) { enum fio_ddir odir = ddir ^ 1; struct timeval t; long usec; assert(ddir_rw(ddir)); if (td->rate_pending_usleep[ddir] <= 0) return ddir; /* * We have too much pending sleep in this direction. See if we * should switch. */ if (td_rw(td) && td->o.rwmix[odir]) { /* * Other direction does not have too much pending, switch */ if (td->rate_pending_usleep[odir] < 100000) return odir; /* * Both directions have pending sleep. Sleep the minimum time * and deduct from both. */ if (td->rate_pending_usleep[ddir] <= td->rate_pending_usleep[odir]) { usec = td->rate_pending_usleep[ddir]; } else { usec = td->rate_pending_usleep[odir]; ddir = odir; } } else usec = td->rate_pending_usleep[ddir]; io_u_quiesce(td); fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); td->rate_pending_usleep[ddir] -= usec; odir = ddir ^ 1; if (td_rw(td) && __should_check_rate(td, odir)) td->rate_pending_usleep[odir] -= usec; if (ddir_trim(ddir)) return ddir; return ddir; }
static void iolog_delay(struct thread_data *td, unsigned long delay) { unsigned long usec = utime_since_now(&td->last_issue); if (delay < usec) return; delay -= usec; /* * less than 100 usec delay, just regard it as noise */ if (delay < 100) return; usec_sleep(td, delay); }
static uint64_t t_crc32(void) { struct timeval s; uint64_t ret; void *buf; int i; buf = malloc(CHUNK); randomize_buf(buf, CHUNK, 0x8989); fio_gettime(&s, NULL); for (i = 0; i < NR_CHUNKS; i++) fio_crc32(buf, CHUNK); ret = utime_since_now(&s); free(buf); return ret; }
/* * Check if we can bump the queue depth */ void lat_target_check(struct thread_data *td) { uint64_t usec_window; uint64_t ios; double success_ios; usec_window = utime_since_now(&td->latency_ts); if (usec_window < td->o.latency_window) return; ios = ddir_rw_sum(td->io_blocks) - td->latency_ios; success_ios = (double) (ios - td->latency_failed) / (double) ios; success_ios *= 100.0; dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f); if (success_ios >= td->o.latency_percentile.u.f) lat_target_success(td); else __lat_target_failed(td); }
static uint64_t t_xxhash(void) { void *state; struct timeval s; uint64_t ret; void *buf; int i; state = XXH32_init(0x8989); buf = malloc(CHUNK); randomize_buf(buf, CHUNK, 0x8989); fio_gettime(&s, NULL); for (i = 0; i < NR_CHUNKS; i++) XXH32_update(state, buf, CHUNK); XXH32_digest(state); ret = utime_since_now(&s); free(buf); return ret; }
static uint64_t t_md5(void) { uint32_t digest[4]; struct fio_md5_ctx ctx = { .hash = digest }; struct timeval s; uint64_t ret; void *buf; int i; fio_md5_init(&ctx); buf = malloc(CHUNK); randomize_buf(buf, CHUNK, 0x8989); fio_gettime(&s, NULL); for (i = 0; i < NR_CHUNKS; i++) fio_md5_update(&ctx, buf, CHUNK); ret = utime_since_now(&s); free(buf); return ret; }
static uint64_t t_sha512(void) { uint8_t sha[128]; struct fio_sha512_ctx ctx = { .buf = sha }; struct timeval s; uint64_t ret; void *buf; int i; fio_sha512_init(&ctx); buf = malloc(CHUNK); randomize_buf(buf, CHUNK, 0x8989); fio_gettime(&s, NULL); for (i = 0; i < NR_CHUNKS; i++) fio_sha512_update(&ctx, buf, CHUNK); ret = utime_since_now(&s); free(buf); return ret; }
void rate_throttle(struct thread_data *td, unsigned long time_spent, unsigned int bytes) { unsigned long usec_cycle; unsigned int bs; if (!td->o.rate && !td->o.rate_iops) return; if (td_rw(td)) bs = td->o.rw_min_bs; else if (td_read(td)) bs = td->o.min_bs[DDIR_READ]; else bs = td->o.min_bs[DDIR_WRITE]; usec_cycle = td->rate_usec_cycle * (bytes / bs); if (time_spent < usec_cycle) { unsigned long s = usec_cycle - time_spent; td->rate_pending_usleep += s; if (td->rate_pending_usleep >= 100000) { struct timeval t; fio_gettime(&t, NULL); usec_sleep(td, td->rate_pending_usleep); td->rate_pending_usleep -= utime_since_now(&t); } } else { long overtime = time_spent - usec_cycle; td->rate_pending_usleep -= overtime; } }
uint64_t utime_since_genesis(void) { return utime_since_now(&genesis); }
static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { struct fio_file *f; dprint_io_u(io_u, "io complete"); td_io_u_lock(td); assert(io_u->flags & IO_U_F_FLIGHT); io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK); /* * Mark IO ok to verify */ if (io_u->ipo) { io_u->ipo->flags &= ~IP_F_IN_FLIGHT; write_barrier(); } td_io_u_unlock(td); if (ddir_sync(io_u->ddir)) { td->last_was_sync = 1; f = io_u->file; if (f) { f->first_write = -1ULL; f->last_write = -1ULL; } return; } td->last_was_sync = 0; td->last_ddir = io_u->ddir; if (!io_u->error && ddir_rw(io_u->ddir)) { unsigned int bytes = io_u->buflen - io_u->resid; const enum fio_ddir idx = io_u->ddir; const enum fio_ddir odx = io_u->ddir ^ 1; int ret; td->io_blocks[idx]++; td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; if (!(io_u->flags & IO_U_F_VER_LIST)) td->this_io_bytes[idx] += bytes; if (idx == DDIR_WRITE) { f = io_u->file; if (f) { if (f->first_write == -1ULL || io_u->offset < f->first_write) f->first_write = io_u->offset; if (f->last_write == -1ULL || ((io_u->offset + bytes) > f->last_write)) f->last_write = io_u->offset + bytes; } } if (ramp_time_over(td) && (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING)) { account_io_completion(td, io_u, icd, idx, bytes); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = (usec_for_io(td, idx) - utime_since_now(&td->start)); } if (idx != DDIR_TRIM && __should_check_rate(td, odx)) td->rate_pending_usleep[odx] = (usec_for_io(td, odx) - utime_since_now(&td->start)); } icd->bytes_done[idx] += bytes; if (io_u->end_io) { ret = io_u->end_io(td, io_u); if (ret && !icd->error) icd->error = ret; } } else if (io_u->error) { icd->error = io_u->error; io_u_log_error(td, io_u); } if (icd->error) { enum error_type_bit eb = td_error_type(io_u->ddir, icd->error); if (!td_non_fatal_error(td, eb, icd->error)) return; /* * If there is a non_fatal error, then add to the error count * and clear all the errors. */ update_error_count(td, icd->error); td_clear_error(td); icd->error = 0; io_u->error = 0; } }
static void io_completed(struct thread_data *td, struct io_u *io_u, struct io_completion_data *icd) { struct fio_file *f; dprint_io_u(io_u, "io complete"); td_io_u_lock(td); assert(io_u->flags & IO_U_F_FLIGHT); io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK); td_io_u_unlock(td); if (ddir_sync(io_u->ddir)) { td->last_was_sync = 1; f = io_u->file; if (f) { f->first_write = -1ULL; f->last_write = -1ULL; } return; } td->last_was_sync = 0; td->last_ddir = io_u->ddir; if (!io_u->error && ddir_rw(io_u->ddir)) { unsigned int bytes = io_u->buflen - io_u->resid; const enum fio_ddir idx = io_u->ddir; const enum fio_ddir odx = io_u->ddir ^ 1; int ret; td->io_blocks[idx]++; td->this_io_blocks[idx]++; td->io_bytes[idx] += bytes; if (!(io_u->flags & IO_U_F_VER_LIST)) td->this_io_bytes[idx] += bytes; if (idx == DDIR_WRITE) { f = io_u->file; if (f) { if (f->first_write == -1ULL || io_u->offset < f->first_write) f->first_write = io_u->offset; if (f->last_write == -1ULL || ((io_u->offset + bytes) > f->last_write)) f->last_write = io_u->offset + bytes; } } if (ramp_time_over(td) && (td->runstate == TD_RUNNING || td->runstate == TD_VERIFYING)) { account_io_completion(td, io_u, icd, idx, bytes); if (__should_check_rate(td, idx)) { td->rate_pending_usleep[idx] = (usec_for_io(td, idx) - utime_since_now(&td->start)); } if (__should_check_latency(td, idx)) { unsigned long lusec = utime_since( &io_u->issue_time, &icd->time); /* Linear increase and logarithmic decrease */ if (lusec > td->o.shed_latency[idx]) { if (td->shed_count[idx] < MAX_SHED_COUNT ) { td->shed_count[idx] += (1<<SHED_FRAC_BITS); } } else if (td->shed_count[idx]) { td->shed_count[idx] -= get_used_bits(td->shed_count[idx]); } if (td->shed_count[idx]) { lusec = (lusec * td->shed_count[idx]) >> SHED_FRAC_BITS; if (lusec > td->rate_pending_usleep[idx]) { td->rate_pending_usleep[idx] = lusec; } } }
static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) { enum fio_ddir odir = ddir ^ 1; struct timeval t; long usec; assert(ddir_rw(ddir)); if (td->rate_pending_usleep[ddir] <= 0) return ddir; /* * We have too much pending sleep in this direction. See if we * should switch. */ if (td_rw(td)) { /* * Other direction does not have too much pending, switch */ if (td->rate_pending_usleep[odir] < 100000) return odir; /* * Both directions have pending sleep. Sleep the minimum time * and deduct from both. */ if (td->rate_pending_usleep[ddir] <= td->rate_pending_usleep[odir]) { usec = td->rate_pending_usleep[ddir]; } else { usec = td->rate_pending_usleep[odir]; ddir = odir; } } else usec = td->rate_pending_usleep[ddir]; /* * We are going to sleep, ensure that we flush anything pending as * not to skew our latency numbers. * * Changed to only monitor 'in flight' requests here instead of the * td->cur_depth, b/c td->cur_depth does not accurately represent * io's that have been actually submitted to an async engine, * and cur_depth is meaningless for sync engines. */ if (td->io_u_in_flight) { int fio_unused ret; ret = io_u_queued_complete(td, td->io_u_in_flight, NULL); } fio_gettime(&t, NULL); usec_sleep(td, usec); usec = utime_since_now(&t); td->rate_pending_usleep[ddir] -= usec; odir = ddir ^ 1; if (td_rw(td) && __should_check_rate(td, odir)) td->rate_pending_usleep[odir] -= usec; if (ddir_trim(ddir)) return ddir; return ddir; }