pair<Alignment, Alignment> Filter::orientation_filter(Alignment& aln_first, Alignment& aln_second){ bool f_rev = false; bool s_rev = false; Path f_path = aln_first.path(); Path s_path = aln_second.path(); for (int i = 0; i < f_path.mapping_size(); i++){ if (f_path.mapping(i).position().is_reverse()){ f_rev = true; } } for (int j = 0; j < s_path.mapping_size(); j++){ if (s_path.mapping(j).position().is_reverse()){ s_rev = true; } } if (!s_rev != !f_rev){ return inverse ? std::make_pair(aln_first, aln_second) : std::make_pair(Alignment(), Alignment()); } else{ return inverse ? std::make_pair(Alignment(), Alignment()) : std::make_pair(aln_first, aln_second); } }
Alignment Filter::soft_clip_filter(Alignment& aln){ //Find overhangs - portions of the read that // are inserted at the ends. if (aln.path().mapping_size() > 0){ Path path = aln.path(); Edit left_edit = path.mapping(0).edit(0); Edit right_edit = path.mapping(path.mapping_size() - 1).edit(path.mapping(path.mapping_size() - 1).edit_size() - 1); int left_overhang = left_edit.to_length() - left_edit.from_length(); int right_overhang = right_edit.to_length() - right_edit.from_length(); if (left_overhang > soft_clip_limit || right_overhang > soft_clip_limit){ return inverse ? Alignment() : aln; } else{ return inverse ? aln : Alignment(); } } else{ if (aln.sequence().length() > soft_clip_limit){ return inverse ? Alignment() : aln; } cerr << "WARNING: SHORT ALIGNMENT: " << aln.sequence().size() << "bp" << endl << "WITH NO MAPPINGS TO REFERENCE" << endl << "CONSIDER REMOVING IT FROM ANALYSIS" << endl; return inverse ? Alignment() : aln; } }
Alignment Sampler::mutate(const Alignment& aln, double base_error, double indel_error) { if (base_error == 0 && indel_error == 0) return aln; string bases = "ATGC"; uniform_real_distribution<double> rprob(0, 1); uniform_int_distribution<int> rbase(0, 3); Alignment mutaln; for (size_t i = 0; i < aln.path().mapping_size(); ++i) { auto& orig_mapping = aln.path().mapping(i); Mapping* new_mapping = mutaln.mutable_path()->add_mapping(); *new_mapping->mutable_position() = orig_mapping.position(); // for each edit in the mapping for (size_t j = 0; j < orig_mapping.edit_size(); ++j) { auto& orig_edit = orig_mapping.edit(j); auto new_edits = mutate_edit(orig_edit, make_pos_t(orig_mapping.position()), base_error, indel_error, bases, rprob, rbase); for (auto& edit : new_edits) { *new_mapping->add_edit() = edit; } } } // re-derive the alignment's sequence mutaln = simplify(mutaln); mutaln.set_sequence(alignment_seq(mutaln)); mutaln.set_name(aln.name()); return mutaln; }
void Pileups::compute_from_alignment(VG& graph, Alignment& alignment) { // if we start reversed if (alignment.has_path() && alignment.path().mapping(0).position().is_reverse()) { alignment = reverse_alignment(alignment, (function<int64_t(int64_t)>) ([&graph](int64_t id) { return graph.get_node(id)->sequence().size(); })); } const Path& path = alignment.path(); int64_t read_offset = 0; for (int i = 0; i < path.mapping_size(); ++i) { const Mapping& mapping = path.mapping(i); if (graph.has_node(mapping.position().node_id())) { const Node* node = graph.get_node(mapping.position().node_id()); NodePileup* pileup = get_create(node->id()); int64_t node_offset = mapping.position().offset(); for (int j = 0; j < mapping.edit_size(); ++j) { const Edit& edit = mapping.edit(j); // process all pileups in edit. // update the offsets as we go compute_from_edit(*pileup, node_offset, read_offset, *node, alignment, mapping, edit); } } } assert(alignment.sequence().empty() || alignment.path().mapping_size() == 0 || read_offset == alignment.sequence().length()); }
pair<Alignment, Alignment> Filter::interchromosomal_filter(Alignment& aln_first, Alignment& aln_second){ if (aln_first.path().name() != aln_second.path().name()){ return std::make_pair(aln_first, aln_second); } else{ return std::make_pair(Alignment(), Alignment()); } }
Alignment merge_alignments(const Alignment& a1, const Alignment& a2, bool debug) { //cerr << "overlap is " << overlap << endl; // if either doesn't have a path, then treat it like a massive softclip if (debug) cerr << "merging alignments " << endl << pb2json(a1) << endl << pb2json(a2) << endl; // concatenate them Alignment a3; a3.set_sequence(a1.sequence() + a2.sequence()); *a3.mutable_path() = concat_paths(a1.path(), a2.path()); if (debug) cerr << "merged alignments, result is " << endl << pb2json(a3) << endl; return a3; }
string Sampler::alignment_seq(const Alignment& aln) { // get the graph corresponding to the alignment path Graph sub; for (int i = 0; i < aln.path().mapping_size(); ++ i) { auto& m = aln.path().mapping(i); if (m.has_position() && m.position().node_id()) { auto id = aln.path().mapping(i).position().node_id(); xgidx->neighborhood(id, 2, sub); } } VG g; g.extend(sub); return g.path_string(aln.path()); }
/** * Filter reads that are less than <PCTID> reference. * I.E. if a read matches the reference along 80% of its * length, and your cutoff is 90% PCTID, throw it out. */ Alignment Filter::percent_identity_filter(Alignment& aln){ double read_pctid = 0.0; //read pct_id = len(matching sequence / len(total sequence) int64_t aln_total_len = aln.sequence().size(); int64_t aln_match_len = 0; std::function<double(int64_t, int64_t)> calc_pct_id = [](int64_t rp, int64_t ttlp){ return ((double) rp / (double) ttlp); }; Path path = aln.path(); //TODO handle reversing mappings for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); for (int j = 0; j < mapping.edit_size(); j++){ Edit ee = mapping.edit(j); if (ee.from_length() == ee.to_length() && ee.sequence() == ""){ aln_match_len += ee.to_length(); } } } if (calc_pct_id(aln_match_len, aln_total_len) < min_percent_identity){ return inverse ? aln : Alignment(); } return inverse ? Alignment() : aln; }
int alignment_from_length(const Alignment& a) { int l = 0; for (const auto& m : a.path().mapping()) { l += from_length(m); } return l; }
Alignment Sampler::alignment_with_error(size_t length, double base_error, double indel_error) { size_t maxiter = 100; Alignment aln; if (base_error > 0 || indel_error > 0) { // sample a longer-than necessary alignment, then trim size_t iter = 0; while (iter++ < maxiter) { aln = mutate( alignment(length + 2 * ((double) length * indel_error)), base_error, indel_error); if (aln.sequence().size() == length) { break; } else if (aln.sequence().size() > length) { aln = strip_from_end(aln, aln.sequence().size() - length); break; } } if (iter == maxiter) { cerr << "[vg::Sampler] Warning: could not generate alignment of sufficient length. " << "Graph may be too small, or indel rate too high." << endl; } } else { aln = alignment(length); } aln.set_identity(identity(aln.path())); return aln; }
string alignment_to_sam(const Alignment& alignment, const string& refseq, const int32_t refpos, const string& cigar, const string& mateseq, const int32_t matepos, const int32_t tlen) { stringstream sam; sam << (!alignment.name().empty() ? alignment.name() : "*") << "\t" << sam_flag(alignment) << "\t" << (refseq.empty() ? "*" : refseq) << "\t" << refpos + 1 << "\t" //<< (alignment.path().mapping_size() ? refpos + 1 : 0) << "\t" // positions are 1-based in SAM, 0 means unmapped << alignment.mapping_quality() << "\t" << (alignment.has_path() && alignment.path().mapping_size() ? cigar : "*") << "\t" << (mateseq == refseq ? "=" : mateseq) << "\t" << matepos + 1 << "\t" << tlen << "\t" << (!alignment.sequence().empty() ? alignment.sequence() : "*") << "\t"; // hack much? if (!alignment.quality().empty()) { const string& quality = alignment.quality(); for (int i = 0; i < quality.size(); ++i) { sam << quality_short_to_char(quality[i]); } } else { sam << "*"; //sam << string(alignment.sequence().size(), 'I'); } //<< (alignment.has_quality() ? string_quality_short_to_char(alignment.quality()) : string(alignment.sequence().size(), 'I')); if (!alignment.read_group().empty()) sam << "\tRG:Z:" << alignment.read_group(); sam << "\n"; return sam.str(); }
Alignment Filter::interchromosomal_filter(Alignment& aln){ bool fails = aln.path().name() != aln.fragment_prev().path().name(); if (fails){ return inverse ? Alignment() : aln; } else{ return inverse ? aln : Alignment(); } }
pair<Alignment, Alignment> Filter::insert_size_filter(Alignment& aln_first, Alignment& aln_second){ // TODO: gret positions from aln_first and aln_second int distance = my_xg_index->approx_path_distance(aln_first.path().name(), 1, 1); if (distance > my_max_distance){ return std::make_pair(aln_first, aln_second); } else{ return std::make_pair(Alignment(), Alignment()); } }
// act like the path this is against is the reference // and generate an equivalent cigar string cigar_against_path(const Alignment& alignment) { vector<pair<int, char> > cigar; if (!alignment.has_path()) return ""; const Path& path = alignment.path(); int l = 0; for (const auto& mapping : path.mapping()) { mapping_cigar(mapping, cigar); } return cigar_string(cigar); }
/* PE functions using fragment_prev and fragment_next */ Alignment Filter::one_end_anchored_filter(Alignment& aln){ if (aln.fragment_prev().name() != ""){ if (aln.path().name() == "" || aln.fragment_prev().path().name() == ""){ inverse ? Alignment() : aln; } else{ inverse ? aln : Alignment(); } } else{ return inverse ? aln : Alignment(); } }
vector<int> Vectorizer::alignment_to_a_hot(Alignment a){ int64_t entity_size = my_xg->node_count + my_xg->edge_count; vector<int> ret(entity_size, 0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); if(! mapping.has_position()){ continue; } Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); // Okay, solved the previous out of range errors: // We have to use an entity-space that is |nodes + edges + 1| // as nodes are indexed from 1, not from 0. //TODO: this means we may one day have to do the same bump up // by one for edges, as I assume they are also indexed starting at 1. //cerr << key << " - " << entity_size << endl; //Find edge by current / previous node ID // we can check the orientation, though it shouldn't **really** matter // whether we catch them in the forward or reverse direction. if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); vector<size_t> edge_paths = my_xg->paths_of_entity(edge_key); if (edge_paths.size() > 0){ ret[edge_key - 1] = 1; } else{ ret[edge_key - 1] = 2; } } } //Check if the node of interest is on a path vector<size_t> node_paths = my_xg->paths_of_node(node_id); if (node_paths.size() > 0){ ret[key - 1] = 2; } else{ ret[key - 1] = 1; } } return ret; }
vector<double> Vectorizer::alignment_to_identity_hot(Alignment a){ int64_t entity_size = my_xg->node_count + my_xg->edge_count; vector<double> ret(entity_size, 0.0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i ++){ Mapping mapping = path.mapping(i); if(! mapping.has_position()){ continue; } Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); //Calculate % identity by walking the edits and counting matches. double pct_id = 0.0; double match_len = 0.0; double total_len = 0.0; for (int j = 0; j < mapping.edit_size(); j++){ Edit e = mapping.edit(j); total_len += e.from_length(); if (e.from_length() == e.to_length() && e.sequence() == ""){ match_len += (double) e.to_length(); } else if (e.from_length() == e.to_length() && e.sequence() != ""){ // TODO if we map but don't match exactly, add half the average length to match_length //match_len += (double) (0.5 * ((double) e.to_length())); } else{ } } pct_id = (match_len == 0.0 && total_len == 0.0) ? 0.0 : (match_len / total_len); ret[key - 1] = pct_id; if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); ret[edge_key - 1] = 1.0; } } } return ret; }
// generates a perfect alignment from the graph Alignment Sampler::alignment(size_t length) { string seq; Alignment aln; Path* path = aln.mutable_path(); pos_t pos = position(); char c = pos_char(pos); // we do something wildly inefficient but conceptually clean // for each position in the mapping we add a mapping // at the end we will simplify the alignment, merging redundant mappings do { // add in the char for the current position seq += c; Mapping* mapping = path->add_mapping(); *mapping->mutable_position() = make_position(pos); Edit* edit = mapping->add_edit(); edit->set_from_length(1); edit->set_to_length(1); // decide the next position auto nextc = next_pos_chars(pos); // no new positions mean we are done; we've reached the end of the graph if (nextc.empty()) break; // what positions do we go to next? vector<pos_t> nextp; for (auto& n : nextc) nextp.push_back(n.first); // pick one at random uniform_int_distribution<int> next_dist(0, nextc.size()-1); // update our position pos = nextp.at(next_dist(rng)); // update our char c = nextc[pos]; } while (seq.size() < length); // save our sequence in the alignment aln.set_sequence(seq); aln = simplify(aln); { // name the alignment string data; aln.SerializeToString(&data); int n; #pragma omp critical(nonce) n = nonce++; data += std::to_string(n); const string hash = sha1head(data, 16); aln.set_name(hash); } // and simplify it aln.set_identity(identity(aln.path())); return aln; }
/** * Looks for alignments that change direction over their length. * This may happen because of: * 1. Mapping artifacts * 2. Cycles * 3. Highly repetitive regions * 4. Inversions (if you're lucky enough) * * Default behavior: if the Alignment reverses, return an empty Alignment. * inverse behavior: if the Alignment reverses, return the Alignment. */ Alignment Filter::reversing_filter(Alignment& aln){ Path path = aln.path(); bool prev = false; for (int i = 1; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); Position pos = mapping.position(); bool prev = path.mapping(i - 1).position().is_reverse(); if (prev != pos.is_reverse()){ return inverse ? aln : Alignment(); } } return inverse ? Alignment() : aln; }
Alignment strip_from_start(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from start" << endl; res.set_sequence(aln.sequence().substr(drop)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), drop).second; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from start 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
/** * Split reads map to two separate paths in the graph OR vastly separated non-consecutive * nodes in a single path. * * They're super important for detecting structural variants, so we may want to * filter them out or collect only split reads. */ Alignment Filter::split_read_filter(Alignment& aln){ //TODO binary search for breakpoint in read would be awesome. Path path = aln.path(); //check if nodes are on same path(s) int top_side = path.mapping_size() - 1; int bottom_side = 0; Mapping bottom_mapping; Mapping top_mapping; string main_path = ""; while (top_side > bottom_side){ //main_path = path_of_node(path.mapping(bottom_side); // //Check if paths are different //if (divergent(node1, node2){ // return inverse ? aln : Alignment(); //} top_mapping = path.mapping(top_side); bottom_mapping = path.mapping(bottom_side); Position top_pos = top_mapping.position(); Position bot_pos = bottom_mapping.position(); id_t top_id = top_pos.node_id(); id_t bottom_id = bot_pos.node_id(); // TODO USE THE XG if (abs(top_id - bottom_id) > 10){ return inverse ? aln : Alignment(); } // Check if two mappings are far apart // // Check if a single mapping has a huge indel top_side--; bottom_side++; } return inverse ? Alignment() : aln; }
Alignment reverse_alignment(const Alignment& aln, const function<int64_t(int64_t)>& node_length) { // We're going to reverse the alignment and all its mappings. // TODO: should we/can we do this in place? Alignment reversed = aln; reversed.set_sequence(reverse_complement(aln.sequence())); if(aln.has_path()) { // Now invert the order of the mappings, and for each mapping, flip the // is_reverse flag. The edits within mappings also get put in reverse // order, get their positions corrected, and get their sequences get // reverse complemented. *reversed.mutable_path() = reverse_path(aln.path(), node_length); } return reversed; }
Alignment strip_from_end(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from end" << endl; size_t cut_at = aln.sequence().size()-drop; //cerr << "Cut at " << cut_at << endl; res.set_sequence(aln.sequence().substr(0, cut_at)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), cut_at).first; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from end 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
int32_t sam_flag(const Alignment& alignment) { int16_t flag = 0; if (alignment.score() == 0) { // unmapped flag |= BAM_FUNMAP; } else { // correctly aligned flag |= BAM_FPROPER_PAIR; } // HACKZ -- you can't determine orientation from a single part of the mapping // unless the graph is a DAG if (alignment.has_path() && alignment.path().mapping(0).position().is_reverse()) { flag |= BAM_FREVERSE; } if (alignment.is_secondary()) { flag |= BAM_FSECONDARY; } return flag; }
/** * * Looks for alignments that transition from one path to another * over their length. This may occur for one of several reasons: * 1. The read covers a translocation * 2. The read looks a lot like two different (but highly-similar paths) * 3. The read is shattered (e.g. as in chromothripsis) * * Default behavior: if the Alignment is path divergent, return an empty Alignment, else return aln * Inverse behavior: if the Alignment is path divergent, return aln, else return an empty Alignment */ Alignment Filter::path_divergence_filter(Alignment& aln){ Path path = aln.path(); for (int i = 1; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); Position pos = mapping.position(); id_t current_node = pos.node_id(); id_t prev_node = path.mapping(i - 1).position().node_id(); bool paths_match = false; vector<size_t> paths_of_prev = my_xg_index->paths_of_node(prev_node); for (int i = 0; i < paths_of_prev.size(); i++){ string p_name = my_xg_index->path_name(paths_of_prev[i]); if (my_xg_index->path_contains_node(p_name, current_node)){ paths_match = true; } } if (!paths_match){ return inverse ? aln : Alignment(); } } return inverse ? Alignment() : aln; }
bit_vector Vectorizer::alignment_to_onehot(Alignment a){ // Make a vector as large as the | |nodes| + |edges| | space // TODO handle edges int64_t entity_size = my_xg->node_count + my_xg->edge_count; bit_vector ret(entity_size, 0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); // Okay, solved the previous out of range errors: // We have to use an entity-space that is |nodes + edges + 1| // as nodes are indexed from 1, not from 0. //TODO: this means we may one day have to do the same bump up // by one for edges, as I assume they are also indexed starting at 1. //cerr << key << " - " << entity_size << endl; //Find edge by current / previous node ID // we can check the orientation, though it shouldn't **really** matter // whether we catch them in the forward or reverse direction. if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); ret[edge_key - 1] = 1; } } //Find entity rank of edge ret[key - 1] = 1; } return ret; }
Alignment Filter::orientation_filter(Alignment& aln){ bool f_rev = false; bool s_rev = false; Path f_path = aln.path(); Path s_path = aln.fragment_prev().path(); for (int i = 0; i < f_path.mapping_size(); i++){ if (f_path.mapping(i).position().is_reverse()){ f_rev = true; } } for (int j = 0; j < s_path.mapping_size(); j++){ if (s_path.mapping(j).position().is_reverse()){ s_rev = true; } } if (f_rev & s_rev){ return inverse ? Alignment() : aln; } else{ return inverse ? aln : Alignment(); } }
size_t from_length_before_pos(const Alignment& aln, const Position& pos) { return path_from_length(cut_path(aln.path(), pos).first); }
size_t from_length_after_pos(const Alignment& aln, const Position& pos) { return path_from_length(cut_path(aln.path(), pos).second); }
// merge that properly handles long indels // assumes that alignments should line up end-to-end Alignment merge_alignments(const vector<Alignment>& alns, bool debug) { if (alns.size() == 0) { Alignment aln; return aln; } else if (alns.size() == 1) { return alns.front(); } // where possible get node and target lengths // to validate after merge /* map<int64_t, map<size_t, set<const Alignment*> > > node_lengths; map<int64_t, map<size_t, set<const Alignment*> > > to_lengths; for (auto& aln : alns) { auto& path = aln.path(); // find a mapping that overlaps the whole node // note that edits aren't simplified // so deletions are intact for (size_t i = 0; i < path.mapping_size(); ++i) { auto& m = path.mapping(i); if (m.position().offset() == 0) { // can we see if the next mapping is on the following node if (i < path.mapping_size()-1 && path.mapping(i+1).position().offset() == 0 && mapping_from_length(path.mapping(i+1)) && mapping_from_length(m)) { // we cover the node, record the to_length and from_length set<const Alignment*>& n = node_lengths[m.position().node_id()][from_length(m)]; n.insert(&aln); set<const Alignment*>& t = to_lengths[m.position().node_id()][to_length(m)]; t.insert(&aln); } } } } // verify our input by checking for disagreements for (auto& n : node_lengths) { auto& node_id = n.first; if (n.second.size() > 1) { cerr << "disagreement in node lengths for " << node_id << endl; for (auto& l : n.second) { cerr << "alignments that report length of " << l.first << endl; for (auto& a : l.second) { cerr << pb2json(*a) << endl; } } } else { //cerr << n.second.begin()->second.size() << " alignments support " // << n.second.begin()->first << " as length for " << node_id << endl; } } */ // parallel merge algorithm // for each generation // merge 0<-0+1, 1<-2+3, ... // until there is only one alignment vector<Alignment> last = alns; // get the alignments ready for merge #pragma omp parallel for for (size_t i = 0; i < last.size(); ++i) { Alignment& aln = last[i]; //cerr << "on " << i << "th aln" << endl // << pb2json(aln) << endl; if (!aln.has_path()) { Mapping m; Edit* e = m.add_edit(); e->set_to_length(aln.sequence().size()); e->set_sequence(aln.sequence()); *aln.mutable_path()->add_mapping() = m; } } while (last.size() > 1) { //cerr << "last size " << last.size() << endl; size_t new_count = last.size()/2; //cerr << "new count b4 " << new_count << endl; new_count += last.size() % 2; // force binary //cerr << "New count = " << new_count << endl; vector<Alignment> curr; curr.resize(new_count); #pragma omp parallel for for (size_t i = 0; i < curr.size(); ++i) { //cerr << "merging " << 2*i << " and " << 2*i+1 << endl; // take a pair from the old alignments // merge them into this one if (2*i+1 < last.size()) { auto& a1 = last[2*i]; auto& a2 = last[2*i+1]; curr[i] = merge_alignments(a1, a2, debug); // check that the merge did the right thing /* auto& a3 = curr[i]; for (size_t j = 0; j < a3.path().mapping_size()-1; ++j) { // look up reported node length // and compare to what we saw // skips last mapping auto& m = a3.path().mapping(j); if (from_length(m) == to_length(m) && m.has_position() && m.position().offset()==0 && a3.path().mapping(j+1).has_position() && a3.path().mapping(j+1).position().offset()==0) { auto nl = node_lengths.find(m.position().node_id()); if (nl != node_lengths.end()) { if (nl->second.find(from_length(m)) == nl->second.end()) { cerr << "node length is not consistent for " << m.position().node_id() << endl; cerr << "expected " << nl->second.begin()->first << endl; cerr << "got " << from_length(m) << endl; cerr << "inputs:" << endl << pb2json(a1) << endl << pb2json(a2) << endl << "output: " << endl << pb2json(a3) << endl; //exit(1); } } } } */ } else { auto& a1 = last[2*i]; //cerr << "no need to merge" << endl; curr[i] = a1; } } last = curr; } Alignment res = last.front(); *res.mutable_path() = simplify(res.path()); return res; }