/* this is the meat of the state machine. There is a list of * active operations structs, and as each one finishes the required * io it is moved to a list of finished operations. Once they have * all finished whatever stage they were in, they are given the chance * to restart and pick a different stage (read/write/random read etc) * * various timings are printed in between the stages, along with * thread synchronization if there are more than one threads. */ int worker(struct thread_info *t) { struct io_oper *oper; char *this_stage = NULL; struct timeval stage_time; int status = 0; int iteration = 0; int cnt; aio_setup(&t->io_ctx, 512); restart: if (num_threads > 1) { pthread_mutex_lock(&stage_mutex); threads_starting++; if (threads_starting == num_threads) { threads_ending = 0; gettimeofday(&global_stage_start_time, NULL); pthread_cond_broadcast(&stage_cond); } while (threads_starting != num_threads) pthread_cond_wait(&stage_cond, &stage_mutex); pthread_mutex_unlock(&stage_mutex); } if (t->active_opers) { this_stage = stage_name(t->active_opers->rw); gettimeofday(&stage_time, NULL); t->stage_mb_trans = 0; } cnt = 0; /* first we send everything through aio */ while(t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) { if (stonewall && threads_ending) { oper = t->active_opers; oper->stonewalled = 1; oper_list_del(oper, &t->active_opers); oper_list_add(oper, &t->finished_opers); } else { run_active_list(t, io_iter, max_io_submit); } cnt++; } if (latency_stats) print_latency(t); if (completion_latency_stats) print_completion_latency(t); /* then we wait for all the operations to finish */ oper = t->finished_opers; do { if (!oper) break; io_oper_wait(t, oper); oper = oper->next; } while(oper != t->finished_opers); /* then we do an fsync to get the timing for any future operations * right, and check to see if any of these need to get restarted */ oper = t->finished_opers; while(oper) { if (fsync_stages) fsync(oper->fd); t->stage_mb_trans += oper_mb_trans(oper); if (restart_oper(oper)) { oper_list_del(oper, &t->finished_opers); oper_list_add(oper, &t->active_opers); oper = t->finished_opers; continue; } oper = oper->next; if (oper == t->finished_opers) break; } if (t->stage_mb_trans && t->num_files > 0) { double seconds = time_since_now(&stage_time); fprintf(stderr, "thread %llu %s totals (%.2f MB/s) %.2f MB in %.2fs\n", (unsigned long long)(t - global_thread_info), this_stage, t->stage_mb_trans/seconds, t->stage_mb_trans, seconds); } if (num_threads > 1) { pthread_mutex_lock(&stage_mutex); threads_ending++; if (threads_ending == num_threads) { threads_starting = 0; pthread_cond_broadcast(&stage_cond); global_thread_throughput(t, this_stage); } while(threads_ending != num_threads) pthread_cond_wait(&stage_cond, &stage_mutex); pthread_mutex_unlock(&stage_mutex); } /* someone got restarted, go back to the beginning */ if (t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) { iteration++; goto restart; } /* finally, free all the ram */ while(t->finished_opers) { oper = t->finished_opers; oper_list_del(oper, &t->finished_opers); status = finish_oper(t, oper); } if (t->num_global_pending) { fprintf(stderr, "global num pending is %d\n", t->num_global_pending); } io_queue_release(t->io_ctx); return status; }
int main (int argc, char **argv) { ssize_t ret_size; struct stat st; int ret, flags; int part_request; long long this_time; double part_min, part_max, time_min, time_max; double time_sum, time_sum2, time_mdev, time_avg; double part_sum, part_sum2, part_mdev, part_avg; long long time_now, time_next, period_deadline; setvbuf(stdout, NULL, _IOLBF, 0); parse_options(argc, argv); interval_ts.tv_sec = interval / 1000000; interval_ts.tv_nsec = (interval % 1000000) * 1000; if (!size) size = default_size; if (size <= 0) errx(1, "request size must be greather than zero"); #ifdef MAX_RW_COUNT if (size > MAX_RW_COUNT) warnx("this platform supports requests %u bytes at most", MAX_RW_COUNT); #endif if (wsize) temp_wsize = wsize; else if (size > temp_wsize) temp_wsize = size; flags = O_RDONLY; #if !defined(HAVE_POSIX_FADVICE) && !defined(HAVE_NOCACHE_IO) # if defined(HAVE_DIRECT_IO) direct |= !cached; # else if (!cached && !write_test) { warnx("non-cached read I/O not supported by this platform"); warnx("you can use write I/O to get reliable results"); cached = 1; } # endif #endif if (write_test) { flags = O_RDWR; make_request = do_pwrite; } if (async) aio_setup(); if (direct) #ifdef HAVE_DIRECT_IO flags |= O_DIRECT; #else errx(1, "direct I/O not supported by this platform"); #endif #ifdef __MINGW32__ flags |= O_BINARY; #endif if (stat(path, &st)) err(2, "stat \"%s\" failed", path); if (!S_ISDIR(st.st_mode) && write_test && write_test < 3) errx(2, "think twice, then use -WWW to shred this target"); if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode)) { if (S_ISDIR(st.st_mode)) st.st_size = offset + temp_wsize; parse_device(st.st_dev); } else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { fd = open(path, flags); if (fd < 0) err(2, "failed to open \"%s\"", path); if (get_device_size(fd, &st)) { if (!S_ISCHR(st.st_mode)) err(2, "block get size ioctl failed"); st.st_size = offset + temp_wsize; fstype = "character"; device = "device"; } else { device_size = st.st_size; fstype = "block"; device = "device "; } if (!cached && write_test && fdatasync(fd)) { warnx("fdatasync not supported by \"%s\", " "enable cached requests", path); cached = 1; } } else { errx(2, "unsupported destination: \"%s\"", path); } if (wsize > st.st_size || offset > st.st_size - wsize) errx(2, "target is too small for this"); if (!wsize) wsize = st.st_size - offset; if (size > wsize) errx(2, "request size is too big for this target"); ret = posix_memalign(&buf, 0x1000, size); if (ret) errx(2, "buffer allocation failed"); random_memory(buf, size); if (S_ISDIR(st.st_mode)) { fd = create_temp(path, "ioping.tmp"); if (fd < 0) err(2, "failed to create temporary file at \"%s\"", path); if (keep_file) { if (fstat(fd, &st)) err(2, "fstat at \"%s\" failed", path); if (st.st_size >= offset + wsize) #ifndef __MINGW32__ if (st.st_blocks >= (st.st_size + 511) / 512) #endif goto skip_preparation; } for (woffset = 0 ; woffset < wsize ; woffset += ret_size) { ret_size = size; if (woffset + ret_size > wsize) ret_size = wsize - woffset; if (woffset) random_memory(buf, ret_size); ret_size = pwrite(fd, buf, ret_size, offset + woffset); if (ret_size <= 0) err(2, "preparation write failed"); } skip_preparation: if (fsync(fd)) err(2, "fsync failed"); } else if (S_ISREG(st.st_mode)) { fd = open(path, flags); if (fd < 0) err(2, "failed to open \"%s\"", path); } if (!cached) { #ifdef HAVE_POSIX_FADVICE ret = posix_fadvise(fd, offset, wsize, POSIX_FADV_RANDOM); if (ret) err(2, "fadvise failed"); #endif #ifdef HAVE_NOCACHE_IO ret = fcntl(fd, F_NOCACHE, 1); if (ret) err(2, "fcntl nocache failed"); #endif } srandom(now()); if (deadline) deadline += now(); set_signal(); request = 0; woffset = 0; part_request = 0; part_min = time_min = LLONG_MAX; part_max = time_max = LLONG_MIN; part_sum = time_sum = 0; part_sum2 = time_sum2 = 0; time_now = now(); period_deadline = time_now + period_time; while (!exiting) { request++; part_request++; if (randomize) woffset = random() % (wsize / size) * size; #ifdef HAVE_POSIX_FADVICE if (!cached) { ret = posix_fadvise(fd, offset + woffset, size, POSIX_FADV_DONTNEED); if (ret) err(3, "fadvise failed"); } #endif if (write_test) shake_memory(buf, size); this_time = now(); ret_size = make_request(fd, buf, size, offset + woffset); if (ret_size < 0) { if (errno != EINTR) err(3, "request failed"); } else if (ret_size < size) warnx("request returned less than expected: %zu", ret_size); else if (ret_size > size) errx(3, "request returned more than expected: %zu", ret_size); time_now = now(); this_time = time_now - this_time; time_next = time_now + interval; part_sum += this_time; part_sum2 += this_time * this_time; if (this_time < part_min) part_min = this_time; if (this_time > part_max) part_max = this_time; if (!quiet) { print_size(ret_size); printf(" %s %s (%s %s", write_test ? "to" : "from", path, fstype, device); if (device_size) print_size(device_size); printf("): request=%d time=", request); print_time(this_time); printf("\n"); } if ((period_request && (part_request >= period_request)) || (period_time && (time_next >= period_deadline))) { part_avg = part_sum / part_request; part_mdev = sqrt(part_sum2 / part_request - part_avg * part_avg); printf("%d %.0f %.0f %.0f %.0f %.0f %.0f %.0f\n", part_request, part_sum, 1000000. * part_request / part_sum, 1000000. * part_request * size / part_sum, part_min, part_avg, part_max, part_mdev); time_sum += part_sum; time_sum2 += part_sum2; if (part_min < time_min) time_min = part_min; if (part_max > time_max) time_max = part_max; part_min = LLONG_MAX; part_max = LLONG_MIN; part_sum = part_sum2 = 0; part_request = 0; period_deadline = time_now + period_time; } if (!randomize) { woffset += size; if (woffset + size > wsize) woffset = 0; } if (exiting) break; if (stop_at_request && request >= stop_at_request) break; if (deadline && time_next >= deadline) break; if (interval) nanosleep(&interval_ts, NULL); } time_sum += part_sum; time_sum2 += part_sum2; if (part_min < time_min) time_min = part_min; if (part_max > time_max) time_max = part_max; time_avg = time_sum / request; time_mdev = sqrt(time_sum2 / request - time_avg * time_avg); if (batch_mode) { printf("%d %.0f %.0f %.0f %.0f %.0f %.0f %.0f\n", request, time_sum, 1000000. * request / time_sum, 1000000. * request * size / time_sum, time_min, time_avg, time_max, time_mdev); } else if (!quiet || (!period_time && !period_request)) { printf("\n--- %s (%s %s", path, fstype, device); if (device_size) print_size(device_size); printf(") ioping statistics ---\n"); print_int(request); printf(" requests completed in "); print_time(time_sum); printf(", "); print_size((long long)request * size); printf(" %s, ", write_test ? "written" : "read"); print_int(1000000. * request / time_sum); printf(" iops, "); print_size(1000000. * request * size / time_sum); printf("/s\n"); printf("min/avg/max/mdev = "); print_time(time_min); printf(" / "); print_time(time_avg); printf(" / "); print_time(time_max); printf(" / "); print_time(time_mdev); printf("\n"); } return 0; }