int main_index(int argc, char** argv) { if (argc == 2) { help_index(argv); return 1; } string rocksdb_name; string gcsa_name; string xg_name; // Where should we import haplotype phasing paths from, if anywhere? string vcf_name; // This maps from graph path name (FASTA name) to VCF contig name map<string,string> path_to_vcf; vector<string> dbg_names; int kmer_size = 0; bool path_only = false; int edge_max = 0; int kmer_stride = 1; int prune_kb = -1; bool store_graph = false; bool dump_index = false; bool describe_index = false; bool show_progress = false; bool set_kmer_size = false; bool path_layout = false; bool store_alignments = false; bool store_node_alignments = false; bool store_mappings = false; bool allow_negs = false; bool compact = false; bool dump_alignments = false; int doubling_steps = 3; bool verify_index = false; bool forward_only = false; size_t size_limit = 200; // in gigabytes bool store_threads = false; // use gPBWT to store paths bool discard_overlaps = false; int c; optind = 2; // force optind past command positional argument while (true) { static struct option long_options[] = { //{"verbose", no_argument, &verbose_flag, 1}, {"db-name", required_argument, 0, 'd'}, {"kmer-size", required_argument, 0, 'k'}, {"edge-max", required_argument, 0, 'e'}, {"kmer-stride", required_argument, 0, 'j'}, {"store-graph", no_argument, 0, 's'}, {"store-alignments", no_argument, 0, 'a'}, {"dump-alignments", no_argument, 0, 'A'}, {"store-mappings", no_argument, 0, 'm'}, {"dump", no_argument, 0, 'D'}, {"metadata", no_argument, 0, 'M'}, {"set-kmer", no_argument, 0, 'S'}, {"threads", required_argument, 0, 't'}, {"progress", no_argument, 0, 'p'}, {"prune", required_argument, 0, 'P'}, {"path-layout", no_argument, 0, 'L'}, {"compact", no_argument, 0, 'C'}, {"allow-negs", no_argument, 0, 'n'}, {"gcsa-name", required_argument, 0, 'g'}, {"xg-name", required_argument, 0, 'x'}, {"vcf-phasing", required_argument, 0, 'v'}, {"rename", required_argument, 0, 'r'}, {"verify-index", no_argument, 0, 'V'}, {"forward-only", no_argument, 0, 'F'}, {"size-limit", no_argument, 0, 'Z'}, {"path-only", no_argument, 0, 'O'}, {"store-threads", no_argument, 0, 'T'}, {"node-alignments", no_argument, 0, 'N'}, {"dbg-in", required_argument, 0, 'i'}, {"discard-overlaps", no_argument, 0, 'o'}, {0, 0, 0, 0} }; int option_index = 0; c = getopt_long (argc, argv, "d:k:j:pDshMt:b:e:SP:LmaCnAg:X:x:v:r:VFZ:Oi:TNo", long_options, &option_index); // Detect the end of the options. if (c == -1) break; switch (c) { case 'd': rocksdb_name = optarg; break; case 'x': xg_name = optarg; break; case 'v': vcf_name = optarg; break; case 'r': { // Parse the rename old=new string key_value(optarg); auto found = key_value.find('='); if (found == string::npos || found == 0 || found + 1 == key_value.size()) { cerr << "error:[vg construct] could not parse rename " << key_value << endl; exit(1); } // Parse out the two parts string vcf_contig = key_value.substr(0, found); string graph_contig = key_value.substr(found + 1); // Add the name mapping path_to_vcf[graph_contig] = vcf_contig; } break; case 'P': prune_kb = atoi(optarg); break; case 'k': kmer_size = atoi(optarg); break; case 'O': path_only = true; break; case 'e': edge_max = atoi(optarg); break; case 'j': kmer_stride = atoi(optarg); break; case 'p': show_progress = true; break; case 'D': dump_index = true; break; case 'M': describe_index = true; break; case 'L': path_layout = true; break; case 'S': set_kmer_size = true; break; case 's': store_graph = true; break; case 'a': store_alignments = true; break; case 'A': dump_alignments = true; break; case 'm': store_mappings = true; break; case 'n': allow_negs = true; break; case 'C': compact = true; break; case 't': omp_set_num_threads(atoi(optarg)); break; case 'g': gcsa_name = optarg; break; case 'V': verify_index = true; break; case 'i': dbg_names.push_back(optarg); break; case 'F': forward_only = true; break; case 'X': doubling_steps = atoi(optarg); break; case 'Z': size_limit = atoi(optarg); break; case 'T': store_threads = true; break; case 'o': discard_overlaps = true; break; case 'N': store_node_alignments = true; break; case 'h': case '?': help_index(argv); exit(1); break; default: abort (); } } if (edge_max == 0) edge_max = kmer_size + 1; vector<string> file_names; while (optind < argc) { string file_name = get_input_file_name(optind, argc, argv); file_names.push_back(file_name); } if (file_names.size() <= 0 && dbg_names.empty()){ //cerr << "No graph provided for indexing. Please provide a .vg file or GCSA2-format deBruijn graph to index." << endl; //return 1; } if (kmer_size == 0 && !gcsa_name.empty() && dbg_names.empty()) { // gcsa doesn't do anything if we tell it a kmer size of 0. cerr << "error:[vg index] kmer size for GCSA2 index must be >0" << endl; return 1; } if (kmer_size < 0) { cerr << "error:[vg index] kmer size cannot be negative" << endl; return 1; } if (kmer_stride <= 0) { // kmer strides of 0 (or negative) are silly. cerr << "error:[vg index] kmer stride must be positive and nonzero" << endl; return 1; } if (!xg_name.empty()) { // We need to build an xg index // We'll fill this with the opened VCF file if we need one. vcflib::VariantCallFile variant_file; if (!vcf_name.empty()) { // There's a VCF we should load haplotype info from variant_file.open(vcf_name); if (!variant_file.is_open()) { cerr << "error:[vg index] could not open " << vcf_name << endl; return 1; } else if (show_progress) { cerr << "Opened variant file " << vcf_name << endl; } } // We want to siphon off the "_alt_<variant>_<number>" paths from "vg // construct -a" and not index them, and use them for creating haplotype // threads. // TODO: a better way to store path metadata map<string, Path> alt_paths; // This is matched against the entire string. regex is_alt("_alt_.+_[0-9]+"); if (file_names.empty()) { // VGset or something segfaults when we feed it no graphs. cerr << "error:[vg index] at least one graph is required to build an xg index" << endl; return 1; } // store the graphs VGset graphs(file_names); // Turn into an XG index, except for the alt paths which we pull out and load into RAM instead. xg::XG index; graphs.to_xg(index, store_threads, is_alt, alt_paths); if (show_progress) { cerr << "Built base XG index" << endl; } // We're going to collect all the phase threads as XG threads (which // aren't huge like Protobuf Paths), and then insert them all into xg in // a batch, for speed. This will take a lot of memory (although not as // much as a real vg::Paths index or vector<Path> would) vector<xg::XG::thread_t> all_phase_threads; if (variant_file.is_open()) { // Now go through and add the varaints. // How many phases are there? size_t num_samples = variant_file.sampleNames.size(); // And how many phases? size_t num_phases = num_samples * 2; for (size_t path_rank = 1; path_rank <= index.max_path_rank(); path_rank++) { // Find all the reference paths and loop over them. We'll just // assume paths that don't start with "_" might appear in the // VCF. We need to use the xg path functions, since we didn't // load up the whole vg graph. // What path is this? string path_name = index.path_name(path_rank); // Convert to VCF space if applicable string vcf_contig_name = path_to_vcf.count(path_name) ? path_to_vcf[path_name] : path_name; if (show_progress) { cerr << "Processing path " << path_name << " as VCF contig " << vcf_contig_name << endl; } // We already know it's not a variant's alt, since those were // removed, so it might be a primary contig. // How many bases is it? size_t path_length = index.path_length(path_name); // We're going to extract it and index it, so we don't keep // making queries against it for every sample. PathIndex path_index(index.path(path_name)); // Allocate some threads to store phase threads vector<xg::XG::thread_t> active_phase_threads{num_phases}; // We need to remember how many paths of a particular phase have // already been generated. vector<int> saved_phase_paths(num_phases, 0); // What's the first reference position after the last variant? vector<size_t> nonvariant_starts(num_phases, 0); // Completed ones just get dumped into the index auto finish_phase = [&](size_t phase_number) { // We have finished a phase (because an unphased variant // came up or we ran out of variants); dump it into the // index under a name and make a new Path for that phase. // Find where this path is in our vector xg::XG::thread_t& to_save = active_phase_threads[phase_number]; if (to_save.size() > 0) { // Only actually do anything if we put in some mappings. // Count this thread from this phase as being saved. saved_phase_paths[phase_number]++; // We don't tie threads from a pahse together in the // index yet. // Copy the thread over to our batch that we GPBWT all // at once, exploiting the fact that VCF-derived graphs // are DAGs. all_phase_threads.push_back(to_save); // Clear it out for re-use to_save.clear(); } }; // We need a way to convert Mappings to ThreadMappings // TODO: add a converting constructor? auto mapping_to_thread_mapping = [](const Mapping& mapping) { xg::XG::ThreadMapping thread_mapping; thread_mapping.node_id = mapping.position().node_id(); thread_mapping.is_reverse = mapping.position().is_reverse(); return thread_mapping; }; // And NodeSides to thread mappings auto node_side_to_thread_mapping = [](const NodeSide& side) { xg::XG::ThreadMapping thread_mapping; thread_mapping.node_id = side.node; thread_mapping.is_reverse = side.is_end; return thread_mapping; }; // We need a way to dump mappings into pahse threads. The // mapping edits and rank and offset info will be ignored; the // Mapping just represents an oriented node traversal. auto append_mapping = [&](size_t phase_number, const xg::XG::ThreadMapping& mapping) { // Find the path to add to xg::XG::thread_t& to_extend = active_phase_threads[phase_number]; // See if the edge we need to follow exists if (to_extend.size() > 0) { // If there's a previous mapping, go find it const xg::XG::ThreadMapping& previous = to_extend[to_extend.size() - 1]; // Break out the IDs and flags we need to check for the edge const int64_t& last_node = previous.node_id; const bool& last_from_start = previous.is_reverse; const int64_t& new_node = mapping.node_id; const bool& new_to_end = mapping.is_reverse; if (!index.has_edge(xg::make_edge(last_node, last_from_start, new_node, new_to_end))) { // We can't have a thread take this edge (or an // equivalent). Split and emit the current mappings // and start a new path. #ifdef debug cerr << "warning:[vg index] phase " << phase_number << " wants edge " << last_node << (last_from_start ? "L" : "R") << " - " << new_node << (new_to_end ? "R" : "L") << " which does not exist. Splitting!" << endl; #endif finish_phase(phase_number); } } // Add the ThreadMapping active_phase_threads[phase_number].push_back(mapping); }; // We need an easy way to append any reference mappings from the // last variant up until a certain position (which may be past // the end of the entire reference path). auto append_reference_mappings_until = [&](size_t phase_number, size_t end) { // We need to look and add in the mappings to the // intervening reference nodes from the last variant, if // any. For which we need access to the last variant's past- // the-end reference position. size_t ref_pos = nonvariant_starts[phase_number]; // Get an iterator to the next node visit to add PathIndex::iterator next_to_add = path_index.find_position(ref_pos); while(ref_pos < end && next_to_add != path_index.end()) { // While there is intervening reference // sequence, add it to our phase. // What node side is the node that covers here? NodeSide ref_side = next_to_add->second; // Stick it in the phase path append_mapping(phase_number, node_side_to_thread_mapping(ref_side)); // Advance to what's after that mapping, pulling node // length from the path index ref_pos += path_index.node_length(next_to_add); // Budge the iterator over so we don't need to do // another tree query. next_to_add++; } nonvariant_starts[phase_number] = ref_pos; }; // We also have another function to handle each variant as it comes in. auto handle_variant = [&](vcflib::Variant& variant) { // So we have a variant // Grab its id, or make one by hashing stuff if it doesn't // have an ID. string var_name = make_variant_id(variant); // We have alt paths like _alt_<var_name>_0 ... // _alt_<var_name>_n. Up to one of them may be missing, in // which case it represents a 0-length path that's just the // edge from the node before the variable part of the // variant to the node after. // If we take the ref allele when the ref path is missing, // we don't care! We'll make mappings through here when we // hit the next nonreference variant or the end of the // contig and add the reference matches. // If we take an allele that's present, we go up through the // end of the ref node that's before it, and then visit the // allele. // If we take an alt allele when its path is missing, we go // up to the end of the ref node before it, and then mark us // as complete through there plus the length of the nodes // along the ref path for the variant (which must be // nonempty). for (int sample_number = 0; sample_number < num_samples; sample_number++) { // For each sample // What sample is it? string& sample_name = variant_file.sampleNames[sample_number]; // Parse it out and see if it's phased. string genotype = variant.getGenotype(sample_name); // Find the phasing bar auto bar_pos = genotype.find('|'); if (bar_pos == string::npos || bar_pos == 0 || bar_pos + 1 >= genotype.size()) { // If it isn't phased, or we otherwise don't like // it, we need to break phasing paths. for (int phase_offset = 0; phase_offset < 2; phase_offset++) { // For each of the two phases for the sample // Remember where the end of the last variant was auto cursor = nonvariant_starts[sample_number * 2 + phase_offset]; // Make the phase thread reference up to the // start of this variant. Doesn't have to be // into the variable region. append_reference_mappings_until(sample_number * 2 + phase_offset, variant.position); // Finish the phase thread and start a new one finish_phase(sample_number * 2 + phase_offset); // Walk the cursor back so we repeat the // reference segment, which we need to do in // order to properly handle zero-length alleles // at the ends of phase blocks. nonvariant_starts[sample_number * 2 + phase_offset] = cursor; // TODO: we still can't handle deletions // adjacent to SNPs where phasing gets lost. We // have to have intervening reference bases. But // that's a defect of the data model. } } // If it is phased, parse out the two alleles and handle // each separately. vector<string> alt_indices({genotype.substr(0, bar_pos), genotype.substr(bar_pos + 1)}); for (int phase_offset = 0; phase_offset < 2; phase_offset++) { // Handle each phase and its alt string& alt_string = alt_indices[phase_offset]; if (alt_string == ".") { // This is a missing data call. Skip it. TODO: // that means we'll just treat it like a // reference call, when really we should break // the haplotype here and not touch either alt. // But if the reference path is empty, and we // don't have a handy alt, we don't necessarily // know where the site actually *is*, so we // can't break phasing. What we should really do // is iterate alt numbers until we find one, but // that's going to be slow. continue; } // Otherwise it must be a proper number reference. // Parse it. int alt_index = stoi(alt_string); if (alt_index != 0) { // If this sample doesn't take the reference // path at this variant, we need to actually go // through it and not just call // append_reference_mappings_until // We need to fill this in with the first // reference position covered by the ref allele // of this site, as actually represented in the // path for the ref alt (i.e. after clipping // fixed bases off the start and end in the // VCF). This is the base after the insertion // for pure insertions. size_t first_ref_base = 0; // We need to look for the ref path for this variant string ref_path_name = "_alt_" + var_name + "_0"; auto ref_path_iter = alt_paths.find(ref_path_name); // We also need to look for the path for this alt of this // variant. string alt_path_name = "_alt_" + var_name + "_" + to_string(alt_index); auto alt_path_iter = alt_paths.find(alt_path_name); if (ref_path_iter != alt_paths.end() && ref_path_iter->second.mapping_size() != 0) { // We have the ref path so we can just look at its first node auto first_ref_node = ref_path_iter->second.mapping(0).position().node_id(); // Find the first place it starts in the ref path first_ref_base = path_index.by_id.at(first_ref_node).first; } else if (alt_path_iter != alt_paths.end() && alt_path_iter->second.mapping_size() != 0) { // We have an alt path, so we can look at // the ref node before it and go one after // its end // Find the first node in the alt auto first_alt_id = alt_path_iter->second.mapping(0).position().node_id(); bool first_alt_orientation = alt_path_iter->second.mapping(0).position().is_reverse(); // Get all the edges coming in to it auto left_edges = (first_alt_orientation ? index.edges_on_end(first_alt_id) : index.edges_on_start(first_alt_id)); // We need to fill in the ref to past the // end of the latest reference node that can // come before this alt. first_ref_base = 0; for (auto& edge : left_edges) { // For every edge, see what other node it attaches to auto other_id = (edge.from() == first_alt_id ? edge.to() : edge.from()); if (other_id == first_alt_id) { // Skip self loops continue; } if (path_index.by_id.count(other_id) == 0) { // Skip nodes that aren't in the reference path continue; } // Look up where the node starts in the reference auto start = path_index.by_id.at(other_id).first; // There plus the length of the node will be the first ref base in our site first_ref_base = max(first_ref_base, start + index.node_length(other_id)); // TODO: handling of cases where the alt // connects to multiple reference nodes // in different orientations. } } else { // We lack both the ref and the alt path. // This site must have been skipped during // construction. cerr << "warning:[vg index] Alt and ref paths for " << var_name << " at " << variant.sequenceName << ":" << variant.position << " missing/empty! Was variant skipped during construction?" << endl; continue; } // Now we know the first ref base in our ref // allele. What's the past-the-end base after we // go through our ref allele. size_t last_ref_base = first_ref_base; if (ref_path_iter != alt_paths.end()) { for (size_t i = 0; i < ref_path_iter->second.mapping_size(); i++) { // Scoot it along with the length of // every node on our reference allele // path. last_ref_base += index.node_length( ref_path_iter->second.mapping(i).position().node_id()); } } if ((nonvariant_starts[sample_number * 2 + phase_offset] <= first_ref_base) || !discard_overlaps) { // We need reference mappings from the last // variant up until the first actually // variable ref base in this site append_reference_mappings_until(sample_number * 2 + phase_offset, first_ref_base); for (size_t i = 0; (alt_path_iter != alt_paths.end() && i < alt_path_iter->second.mapping_size()); i++) { // Then blit mappings from the alt over to the phase thread append_mapping(sample_number * 2 + phase_offset, mapping_to_thread_mapping(alt_path_iter->second.mapping(i))); } // Say we've accounted for the reference on // this path through the end of the variable // region, which we have. nonvariant_starts[sample_number * 2 + phase_offset] = last_ref_base; } } } // Now we have processed both phasings for this sample. } }; // Look for variants only on this path variant_file.setRegion(vcf_contig_name); // Set up progress bar ProgressBar* progress = nullptr; // Message needs to last as long as the bar itself. string progress_message = "loading variants for " + vcf_contig_name; if (show_progress) { progress = new ProgressBar(path_length, progress_message.c_str()); progress->Progressed(0); } // Allocate a place to store actual variants vcflib::Variant var(variant_file); // How many variants have we done? size_t variants_processed = 0; while (variant_file.is_open() && variant_file.getNextVariant(var) && var.sequenceName == vcf_contig_name) { // this ... maybe we should remove it as for when we have calls against N bool isDNA = allATGC(var.ref); for (vector<string>::iterator a = var.alt.begin(); a != var.alt.end(); ++a) { if (!allATGC(*a)) isDNA = false; } // only work with DNA sequences if (!isDNA) { continue; } var.position -= 1; // convert to 0-based // Handle the variant handle_variant(var); if (variants_processed++ % 1000 == 0 && progress != nullptr) { // Say we made progress progress->Progressed(var.position); } } if (variants_processed > 0) { // There were actually some variants on this path. We only // want to actually have samples traverse the path if there // were variants on it. // Now finish up all the threads for (size_t i = 0; i < num_phases; i++) { // Each thread runs out until the end of the reference path append_reference_mappings_until(i, path_length); // And then we save all the threads finish_phase(i); } } if (progress != nullptr) { // Throw out our progress bar delete progress; cerr << endl; if (show_progress) { cerr << "Processed " << variants_processed << " variants" << endl; } } } if (show_progress) { cerr << "Inserting all phase threads into DAG..." << endl; } // Now insert all the threads in a batch into the known-DAG VCF- // derived graph. index.insert_threads_into_dag(all_phase_threads); all_phase_threads.clear(); } if (show_progress) { cerr << "Saving index to disk..." << endl; } // save the xg version to the file name we've been given ofstream db_out(xg_name); index.serialize(db_out); db_out.close(); } if (!gcsa_name.empty()) { // We need to make a gcsa index. // Configure GCSA2 verbosity so it doesn't spit out loads of extra info if (!show_progress) gcsa::Verbosity::set(gcsa::Verbosity::SILENT); // Load up the graphs vector<string> tmpfiles; if (dbg_names.empty()) { VGset graphs(file_names); graphs.show_progress = show_progress; // Go get the kmers of the correct size tmpfiles = graphs.write_gcsa_kmers_binary(kmer_size, path_only, forward_only); } else { tmpfiles = dbg_names; } // Make the index with the kmers gcsa::InputGraph input_graph(tmpfiles, true); gcsa::ConstructionParameters params; params.setSteps(doubling_steps); params.setLimit(size_limit); // build the GCSA index gcsa::GCSA* gcsa_index = new gcsa::GCSA(input_graph, params); // build the LCP array string lcp_name = gcsa_name + ".lcp"; gcsa::LCPArray* lcp_array = new gcsa::LCPArray(input_graph, params); if (verify_index) { //cerr << "verifying index" << endl; if (!gcsa::verifyIndex(*gcsa_index, lcp_array, input_graph)) { cerr << "[vg::main]: GCSA2 index verification failed" << endl; } } // clean up input graph temp files if (dbg_names.empty()) { for (auto& tfn : tmpfiles) { remove(tfn.c_str()); } } // Save the GCSA2 index sdsl::store_to_file(*gcsa_index, gcsa_name); delete gcsa_index; // Save the LCP array sdsl::store_to_file(*lcp_array, lcp_name); delete lcp_array; } if (!rocksdb_name.empty()) { Index index; if (compact) { index.open_for_write(rocksdb_name); index.compact(); index.flush(); index.close(); } // todo, switch to xg for graph storage // index should write and load index/xg or such // then a handful of functions used in main.cpp and mapper.cpp need to be rewritten to use the xg index if (store_graph && file_names.size() > 0) { index.open_for_write(rocksdb_name); VGset graphs(file_names); graphs.show_progress = show_progress; graphs.store_in_index(index); //index.flush(); //index.close(); // reopen to index paths // this requires the index to be queryable //index.open_for_write(db_name); graphs.store_paths_in_index(index); index.compact(); index.flush(); index.close(); } if (store_node_alignments && file_names.size() > 0) { index.open_for_bulk_load(rocksdb_name); int64_t aln_idx = 0; function<void(Alignment&)> lambda = [&index,&aln_idx](Alignment& aln) { index.cross_alignment(aln_idx++, aln); }; for (auto& file_name : file_names) { get_input_file(file_name, [&](istream& in) { stream::for_each_parallel(in, lambda); }); } index.flush(); index.close(); } if (store_alignments && file_names.size() > 0) { index.open_for_bulk_load(rocksdb_name); function<void(Alignment&)> lambda = [&index](Alignment& aln) { index.put_alignment(aln); }; for (auto& file_name : file_names) { get_input_file(file_name, [&](istream& in) { stream::for_each_parallel(in, lambda); }); } index.flush(); index.close(); } if (dump_alignments) { vector<Alignment> output_buf; index.open_read_only(rocksdb_name); auto lambda = [&output_buf](const Alignment& aln) { output_buf.push_back(aln); stream::write_buffered(cout, output_buf, 100); }; index.for_each_alignment(lambda); stream::write_buffered(cout, output_buf, 0); index.close(); } if (store_mappings && file_names.size() > 0) { index.open_for_bulk_load(rocksdb_name); function<void(Alignment&)> lambda = [&index](Alignment& aln) { const Path& path = aln.path(); for (int i = 0; i < path.mapping_size(); ++i) { index.put_mapping(path.mapping(i)); } }; for (auto& file_name : file_names) { get_input_file(file_name, [&](istream& in) { stream::for_each_parallel(in, lambda); }); } index.flush(); index.close(); } if (kmer_size != 0 && file_names.size() > 0) { index.open_for_bulk_load(rocksdb_name); VGset graphs(file_names); graphs.show_progress = show_progress; graphs.index_kmers(index, kmer_size, path_only, edge_max, kmer_stride, allow_negs); index.flush(); index.close(); // forces compaction index.open_for_write(rocksdb_name); index.flush(); index.compact(); index.close(); } if (prune_kb >= 0) { if (show_progress) { cerr << "pruning kmers > " << prune_kb << " on disk from " << rocksdb_name << endl; } index.open_for_write(rocksdb_name); index.prune_kmers(prune_kb); index.compact(); index.close(); } if (set_kmer_size) { assert(kmer_size != 0); index.open_for_write(rocksdb_name); index.remember_kmer_size(kmer_size); index.close(); } if (dump_index) { index.open_read_only(rocksdb_name); index.dump(cout); index.close(); } if (describe_index) { index.open_read_only(rocksdb_name); set<int> kmer_sizes = index.stored_kmer_sizes(); cout << "kmer sizes: "; for (auto kmer_size : kmer_sizes) { cout << kmer_size << " "; } cout << endl; index.close(); } if (path_layout) { index.open_read_only(rocksdb_name); //index.path_layout(); map<string, int64_t> path_by_id = index.paths_by_id(); map<string, pair<pair<int64_t, bool>, pair<int64_t, bool>>> layout; map<string, int64_t> length; index.path_layout(layout, length); for (auto& p : layout) { // Negate IDs for backward nodes cout << p.first << " " << p.second.first.first * (p.second.first.second ? -1 : 1) << " " << p.second.second.first * (p.second.second.second ? -1 : 1) << " " << length[p.first] << endl; } index.close(); } } return 0; }
double Simulation::RunBody(double maxSimTime) { //needed to control the execution time PART 1 //in the case you want to run in no faster than realtime //time_t starttime, endtime; //time(&starttime); //take the current time from the pedestrian double t = Pedestrian::GetGlobalTime(); //frame number. This function can be called many times, static int frameNr = (int) (1+t/_deltaT); // Frame Number //########## //PROBLEMATIC: time when frame should be printed out // possibly skipped when using the following lines // NEEDS TO BE FIXED! int writeInterval = (int) ((1./_fps)/_deltaT+0.5); writeInterval = (writeInterval<=0) ? 1 : writeInterval; // mustn't be <= 0 // ########## //process the queue for incoming pedestrians //important since the number of peds is used //to break the main simulation loop ProcessAgentsQueue(); _nPeds = _building->GetAllPedestrians().size(); std::cout << "\n"; std::string description = "Evacutation "; ProgressBar *bar = new ProgressBar(_nPeds, description); // bar->SetFrequencyUpdate(10); #ifdef _WINDOWS bar->SetStyle("|","-"); #else bar->SetStyle("\u2588", "-"); //for linux #endif int initialnPeds = _nPeds; // main program loop while ((_nPeds || (!_agentSrcManager.IsCompleted()&& _gotSources) ) && t<maxSimTime) { t = 0+(frameNr-1)*_deltaT; //process the queue for incoming pedestrians ProcessAgentsQueue(); if (t>Pedestrian::GetMinPremovementTime()) { //update the linked cells _building->UpdateGrid(); // update the positions _operationalModel->ComputeNextTimeStep(t, _deltaT, _building.get(), _periodic); //update the events _em->ProcessEvent(); //here we could place router-tasks (calc new maps) that can use multiple cores AND we have 't' //update quickestRouter if (_routingEngine.get()->GetRouter(ROUTING_FF_QUICKEST)) { FFRouter* ffrouter = dynamic_cast<FFRouter*>(_routingEngine.get()->GetRouter(ROUTING_FF_QUICKEST)); if (ffrouter->MustReInit()) { ffrouter->ReInit(); ffrouter->SetRecalc(t); } } //update the routes and locations UpdateRoutesAndLocations(); //other updates //someone might have left the building _nPeds = _building->GetAllPedestrians().size(); } // update the global time Pedestrian::SetGlobalTime(t); // write the trajectories if (0==frameNr%writeInterval) { _iod->WriteFrame(frameNr/writeInterval, _building.get()); } if(!_gotSources && !_periodic && _config->print_prog_bar()) // Log->ProgressBar(initialnPeds, initialnPeds-_nPeds, t); bar->Progressed(initialnPeds-_nPeds); else if ((!_gotSources) && ((frameNr < 100 && frameNr % 10 == 0) || (frameNr > 100 && frameNr % 100 == 0))) printf("time: %6.2f (%4.0f) | Agents: %6ld / %d [%4.1f%%]\n", t , maxSimTime, _nPeds, initialnPeds, (double)(initialnPeds-_nPeds)/initialnPeds*100); // needed to control the execution time PART 2 // time(&endtime); // double timeToWait=t-difftime(endtime, starttime); // clock_t goal = timeToWait*1000 + clock(); // while (goal > clock()); ++frameNr; } return t; }