static void print_time(struct io_oper *oper) { double runtime; double tput; double mb; runtime = time_since_now(&oper->start_time); mb = oper_mb_trans(oper); tput = mb / runtime; fprintf(stderr, "%s on %s (%.2f MB/s) %.2f MB in %.2fs\n", stage_name(oper->rw), oper->file_name, tput, mb, runtime); }
/* this is the meat of the state machine. There is a list of * active operations structs, and as each one finishes the required * io it is moved to a list of finished operations. Once they have * all finished whatever stage they were in, they are given the chance * to restart and pick a different stage (read/write/random read etc) * * various timings are printed in between the stages, along with * thread synchronization if there are more than one threads. */ int worker(struct thread_info *t) { struct io_oper *oper; char *this_stage = NULL; struct timeval stage_time; int status = 0; int iteration = 0; int cnt; aio_setup(&t->io_ctx, 512); restart: if (num_threads > 1) { pthread_mutex_lock(&stage_mutex); threads_starting++; if (threads_starting == num_threads) { threads_ending = 0; gettimeofday(&global_stage_start_time, NULL); pthread_cond_broadcast(&stage_cond); } while (threads_starting != num_threads) pthread_cond_wait(&stage_cond, &stage_mutex); pthread_mutex_unlock(&stage_mutex); } if (t->active_opers) { this_stage = stage_name(t->active_opers->rw); gettimeofday(&stage_time, NULL); t->stage_mb_trans = 0; } cnt = 0; /* first we send everything through aio */ while(t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) { if (stonewall && threads_ending) { oper = t->active_opers; oper->stonewalled = 1; oper_list_del(oper, &t->active_opers); oper_list_add(oper, &t->finished_opers); } else { run_active_list(t, io_iter, max_io_submit); } cnt++; } if (latency_stats) print_latency(t); if (completion_latency_stats) print_completion_latency(t); /* then we wait for all the operations to finish */ oper = t->finished_opers; do { if (!oper) break; io_oper_wait(t, oper); oper = oper->next; } while(oper != t->finished_opers); /* then we do an fsync to get the timing for any future operations * right, and check to see if any of these need to get restarted */ oper = t->finished_opers; while(oper) { if (fsync_stages) fsync(oper->fd); t->stage_mb_trans += oper_mb_trans(oper); if (restart_oper(oper)) { oper_list_del(oper, &t->finished_opers); oper_list_add(oper, &t->active_opers); oper = t->finished_opers; continue; } oper = oper->next; if (oper == t->finished_opers) break; } if (t->stage_mb_trans && t->num_files > 0) { double seconds = time_since_now(&stage_time); fprintf(stderr, "thread %llu %s totals (%.2f MB/s) %.2f MB in %.2fs\n", (unsigned long long)(t - global_thread_info), this_stage, t->stage_mb_trans/seconds, t->stage_mb_trans, seconds); } if (num_threads > 1) { pthread_mutex_lock(&stage_mutex); threads_ending++; if (threads_ending == num_threads) { threads_starting = 0; pthread_cond_broadcast(&stage_cond); global_thread_throughput(t, this_stage); } while(threads_ending != num_threads) pthread_cond_wait(&stage_cond, &stage_mutex); pthread_mutex_unlock(&stage_mutex); } /* someone got restarted, go back to the beginning */ if (t->active_opers && (cnt < iterations || iterations == RUN_FOREVER)) { iteration++; goto restart; } /* finally, free all the ram */ while(t->finished_opers) { oper = t->finished_opers; oper_list_del(oper, &t->finished_opers); status = finish_oper(t, oper); } if (t->num_global_pending) { fprintf(stderr, "global num pending is %d\n", t->num_global_pending); } io_queue_release(t->io_ctx); return status; }
int main(int ac, char **av) { int rwfd; int i; int j; int c; off_t file_size = 1 * 1024 * 1024 * 1024; int first_stage = WRITE; struct io_oper *oper; int status = 0; int num_files = 0; int open_fds = 0; struct thread_info *t; page_size_mask = getpagesize() - 1; while(1) { c = getopt(ac, av, "a:b:c:C:m:s:r:d:i:I:o:t:lLnhOSxvu"); if (c < 0) break; switch(c) { case 'a': page_size_mask = parse_size(optarg, 1024); page_size_mask--; break; case 'c': num_contexts = atoi(optarg); break; case 'C': context_offset = parse_size(optarg, 1024 * 1024); case 'b': max_io_submit = atoi(optarg); break; case 's': file_size = parse_size(optarg, 1024 * 1024); break; case 'd': depth = atoi(optarg); break; case 'r': rec_len = parse_size(optarg, 1024); break; case 'i': io_iter = atoi(optarg); break; case 'I': iterations = atoi(optarg); break; case 'n': fsync_stages = 0; break; case 'l': latency_stats = 1; break; case 'L': completion_latency_stats = 1; break; case 'm': if (!strcmp(optarg, "shm")) { fprintf(stderr, "using ipc shm\n"); use_shm = USE_SHM; } else if (!strcmp(optarg, "shmfs")) { fprintf(stderr, "using /dev/shm for buffers\n"); use_shm = USE_SHMFS; } break; case 'o': i = atoi(optarg); stages |= 1 << i; fprintf(stderr, "adding stage %s\n", stage_name(i)); break; case 'O': o_direct = O_DIRECT; break; case 'S': o_sync = O_SYNC; break; case 't': num_threads = atoi(optarg); break; case 'x': stonewall = 0; break; case 'u': unlink_files = 1; break; case 'v': verify = 1; break; case 'h': default: print_usage(); exit(1); } } /* * make sure we don't try to submit more ios than we have allocated * memory for */ if (depth < io_iter) { io_iter = depth; fprintf(stderr, "dropping io_iter to %d\n", io_iter); } if (optind >= ac) { print_usage(); exit(1); } num_files = ac - optind; if (num_threads > (num_files * num_contexts)) { num_threads = num_files * num_contexts; fprintf(stderr, "dropping thread count to the number of contexts %d\n", num_threads); } t = malloc(num_threads * sizeof(*t)); if (!t) { perror("malloc"); exit(1); } global_thread_info = t; /* by default, allow a huge number of iocbs to be sent towards * io_submit */ if (!max_io_submit) max_io_submit = num_files * io_iter * num_contexts; /* * make sure we don't try to submit more ios than max_io_submit allows */ if (max_io_submit < io_iter) { io_iter = max_io_submit; fprintf(stderr, "dropping io_iter to %d\n", io_iter); } if (!stages) { stages = (1 << WRITE) | (1 << READ) | (1 << RREAD) | (1 << RWRITE); } else { for (i = 0 ; i < LAST_STAGE; i++) { if (stages & (1 << i)) { first_stage = i; fprintf(stderr, "starting with %s\n", stage_name(i)); break; } } } if (file_size < num_contexts * context_offset) { fprintf(stderr, "file size %Lu too small for %d contexts\n", (unsigned long long)file_size, num_contexts); exit(1); } fprintf(stderr, "file size %LuMB, record size %luKB, depth %d, ios per iteration %d\n", (unsigned long long)file_size / (1024 * 1024), rec_len / 1024, depth, io_iter); fprintf(stderr, "max io_submit %d, buffer alignment set to %luKB\n", max_io_submit, (page_size_mask + 1)/1024); fprintf(stderr, "threads %d files %d contexts %d context offset %LuMB verification %s\n", num_threads, num_files, num_contexts, (unsigned long long)context_offset / (1024 * 1024), verify ? "on" : "off"); /* open all the files and do any required setup for them */ for (i = optind ; i < ac ; i++) { int thread_index; for (j = 0 ; j < num_contexts ; j++) { thread_index = open_fds % num_threads; open_fds++; rwfd = open(av[i], O_CREAT | O_RDWR | o_direct | o_sync, 0600); assert(rwfd != -1); oper = create_oper(rwfd, first_stage, j * context_offset, file_size - j * context_offset, rec_len, depth, io_iter, av[i]); if (!oper) { fprintf(stderr, "error in create_oper\n"); exit(-1); } oper_list_add(oper, &t[thread_index].active_opers); t[thread_index].num_files++; } } if (setup_shared_mem(num_threads, num_files * num_contexts, depth, rec_len, max_io_submit)) { exit(1); } for (i = 0 ; i < num_threads ; i++) { if (setup_ious(&t[i], t[i].num_files, depth, rec_len, max_io_submit)) exit(1); } if (num_threads > 1){ printf("Running multi thread version num_threads:%d\n", num_threads); run_workers(t, num_threads); } else { printf("Running single thread version \n"); status = worker(t); } if (unlink_files) { for (i = optind ; i < ac ; i++) { printf("Cleaning up file %s \n", av[i]); unlink(av[i]); } } if (status) { exit(1); } return status; }
void Kernel::collectExtraOptions() { for (const auto& o : m_extra_options) { typedef boost::tokenizer<boost::char_separator<char>> tokenizer; // if we don't have --, we're not an option we // even care about if (!boost::algorithm::find_first(o, "--")) continue; // Find the dimensions listed and put them on the id list. boost::char_separator<char> equal("="); boost::char_separator<char> dot("."); // boost::erase_all(o, " "); // Wipe off spaces tokenizer option_tokens(o, equal); std::vector<std::string> option_split; for (auto ti = option_tokens.begin(); ti != option_tokens.end(); ++ti) option_split.push_back(boost::lexical_cast<std::string>(*ti)); if (!(option_split.size() == 2)) { std::ostringstream oss; oss << "option '" << o << "' did not split correctly. Is it in the form --readers.las.option=foo?"; throw app_usage_error(oss.str()); } std::string option_value(option_split[1]); std::string stage_value(option_split[0]); boost::algorithm::erase_all(stage_value, "--"); tokenizer name_tokens(stage_value, dot); std::vector<std::string> stage_values; for (auto ti = name_tokens.begin(); ti != name_tokens.end(); ++ti) { stage_values.push_back(*ti); } std::string option_name = *stage_values.rbegin(); std::ostringstream stage_name_ostr; bool bFirst(true); for (auto s = stage_values.begin(); s != stage_values.end()-1; ++s) { auto s2 = boost::algorithm::erase_all_copy(*s, " "); if (bFirst) { bFirst = false; } else stage_name_ostr <<"."; stage_name_ostr << s2; } std::string stage_name(stage_name_ostr.str()); auto found = m_extra_stage_options.find(stage_name); if (found == m_extra_stage_options.end()) m_extra_stage_options.insert(std::make_pair(stage_name, Option(option_name, option_value, ""))); else found->second.add(Option(option_name, option_value, "")); } }