Alignment merge_alignments(const Alignment& a1, const Alignment& a2, bool debug) { //cerr << "overlap is " << overlap << endl; // if either doesn't have a path, then treat it like a massive softclip if (debug) cerr << "merging alignments " << endl << pb2json(a1) << endl << pb2json(a2) << endl; // concatenate them Alignment a3; a3.set_sequence(a1.sequence() + a2.sequence()); *a3.mutable_path() = concat_paths(a1.path(), a2.path()); if (debug) cerr << "merged alignments, result is " << endl << pb2json(a3) << endl; return a3; }
void PathIndex::apply_translations(const vector<Translation>& translations) { // Convert from normal to partitioning translations // For each original node ID, we keep a vector of pairs of from mapping and // to mapping. We only keep pairs where the from mapping isn't empty. map<id_t, vector<pair<Mapping, Mapping>>> collated; for (auto& t : translations) { if (t.from().mapping_size() < 1 || t.to().mapping_size() != 1) { // Ensure the translations are the format we expect. They always have // at least one from mapping (but maybe an insert too) and exactly 1 // to mapping. cerr << "error:[vg::PathIndex] Bad translation: " << pb2json(t) << endl; throw runtime_error("Translation not in VG::edit() format"); } if (mapping_from_length(t.from().mapping(0)) == 0) { // This is a novel node and can't be on our path continue; } if (t.from().mapping(0).position().is_reverse()) { // Wait for the forward-orientation version continue; } // Stick the from and to mappings in the list for the from node collated[t.from().mapping(0).position().node_id()].push_back(make_pair(t.from().mapping(0), t.to().mapping(0))); } for (auto& kv : collated) { // For every original node and its replacement nodes // Sort the replacement mappings std::sort(kv.second.begin(), kv.second.end(), [](const pair<Mapping, Mapping>& a, const pair<Mapping, Mapping>& b) { // Return true if the a pair belongs before the b pair along the path through the original node return a.first.position().offset() <= b.first.position().offset(); }); // Make a new translation to cover the original node Translation covering; for (auto mapping_pair : kv.second) { // Split across these parts of new nodes *(covering.mutable_to()->add_mapping()) = mapping_pair.second; } // Just assume we take up the whole original node auto* from_mapping = covering.mutable_from()->add_mapping(); from_mapping->mutable_position()->set_node_id(kv.first); // Give it a full length perfect match auto* from_edit = from_mapping->add_edit(); from_edit->set_from_length(path_from_length(covering.to())); from_edit->set_to_length(from_edit->from_length()); // Apply this (single node) translation. // TODO: batch up a bit? apply_translation(covering); } }
void Caller::write_call_graph(ostream& out, bool json) { if (json) { out << pb2json(_call_graph.graph); } else { _call_graph.serialize_to_ostream(out); } }
inline int64_t JSONStreamHelper<T>::write(std::ostream& out, bool json_out, int64_t buf_size) { std::function<bool(T&)> reader = get_read_fn(); std::vector<T> buf; int64_t total = 0; bool good = true; std::function<T(size_t)> lambda = [&](size_t i) -> T {return buf[i];}; while (good) { T obj; good = reader(obj); if (good) { buf.push_back(obj); } if (!good || buf.size() >= buf_size) { if (!json_out) { stream::write(out, buf.size(), lambda); } else { for (int i = 0; i < buf.size(); ++i) { out << pb2json(buf[i]); } } total += buf.size(); buf.clear(); } } if (!json_out) { stream::finish(out); } out.flush(); return total; }
Alignment strip_from_start(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from start" << endl; res.set_sequence(aln.sequence().substr(drop)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), drop).second; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from start 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
Alignment strip_from_end(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from end" << endl; size_t cut_at = aln.sequence().size()-drop; //cerr << "Cut at " << cut_at << endl; res.set_sequence(aln.sequence().substr(0, cut_at)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), cut_at).first; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from end 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
/** * Create a VG grpah from a pinch thread set. */ vg::VG pinchToVG(stPinchThreadSet* threadSet, std::map<int64_t, std::string>& threadSequences) { // Make an empty graph vg::VG graph; // Remember what nodes have been created for what segments. Only the first // segment in a block (the "leader") gets a node. Segments without blocks // are also themselves leaders and get nodes. std::map<stPinchSegment*, vg::Node*> nodeForLeader; std::cerr << "Making pinch graph into vg graph with " << threadSequences.size() << " relevant threads" << std::endl; // This is the cleverest way to loop over Benedict's iterators. auto segmentIterator = stPinchThreadSet_getSegmentIt(threadSet); while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) { // For every segment, we need to make a VG node for it or its block (if // it has one). #ifdef debug std::cerr << "Found segment " << segment << std::endl; #endif // See if the segment is in a block auto block = stPinchSegment_getBlock(segment); // Get the leader segment: first in the block, or this segment if no block auto leader = getLeader(segment); if(nodeForLeader.count(leader)) { // A node has already been made for this block. continue; } // Otherwise, we need the sequence std::string sequence; if(block) { // Get the sequence by scanning through the block for the first sequence // that isn't all Ns, if any. auto segmentIterator = stPinchBlock_getSegmentIterator(block); while(auto sequenceSegment = stPinchBlockIt_getNext(&segmentIterator)) { if(!threadSequences.count(stPinchSegment_getName(sequenceSegment))) { // This segment is part of a staple. Pass it up continue; } // Go get the sequence of the thread, and clip out the part relevant to this segment. sequence = threadSequences.at(stPinchSegment_getName(sequenceSegment)).substr( stPinchSegment_getStart(sequenceSegment), stPinchSegment_getLength(sequenceSegment)); // If necessary, flip the segment around if(getOrientation(sequenceSegment)) { sequence = vg::reverse_complement(sequence); } if(std::count(sequence.begin(), sequence.end(), 'N') + std::count(sequence.begin(), sequence.end(), 'n') < sequence.size()) {\ // The sequence has some non-N characters // If it's not all Ns, break break; } // Otherwise try the next segment } } else { // Just pull the sequence from the lone segment sequence = threadSequences.at(stPinchSegment_getName(segment)).substr( stPinchSegment_getStart(segment), stPinchSegment_getLength(segment)); // It doesn't need to flip, since it can't be backwards in a block } // Make a node in the graph to represent the block vg::Node* node = graph.create_node(sequence); // Remember it nodeForLeader[leader] = node; #ifdef debug std::cerr << "Made node: " << pb2json(*node) << std::endl; #endif } // Now go through the segments again and wire them up. segmentIterator = stPinchThreadSet_getSegmentIt(threadSet); while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) { // See if the segment is in a block auto block = stPinchSegment_getBlock(segment); // Get the leader segment: first in the block, or this segment if no block auto leader = getLeader(segment); // We know we have a node already auto node = nodeForLeader.at(leader); // What orientation is this node in for the purposes of this edge // TODO: ought to always be false if the segment isn't in a block. Is this true? auto orientation = getOrientation(segment); #ifdef debug std::cerr << "Revisited segment: " << segment << " for node " << node->id() << " in orientation " << (orientation ? "reverse" : "forward") << std::endl; #endif // Look at the segment 5' of here. We know it's not a staple and // thus has a vg node. auto prevSegment = stPinchSegment_get5Prime(segment); if(prevSegment) { // Get the node IDs and orientations auto prevNode = nodeForLeader.at(getLeader(prevSegment)); auto prevOrientation = getOrientation(prevSegment); #ifdef debug std::cerr << "Found prev node " << prevNode->id() << " in orientation " << (prevOrientation ? "reverse" : "forward") << std::endl; #endif // Make an edge vg::Edge prevEdge; prevEdge.set_from(prevNode->id()); prevEdge.set_from_start(prevOrientation); prevEdge.set_to(node->id()); prevEdge.set_to_end(orientation); // Add it in. vg::VG deduplicates for us graph.add_edge(prevEdge); #ifdef debug std::cerr << "Made edge: " << pb2json(prevEdge) << std::endl; #endif } // Now do the same thing for the 3' side auto nextSegment = stPinchSegment_get3Prime(segment); if(nextSegment) { // Get the node IDs and orientations auto nextNode = nodeForLeader.at(getLeader(nextSegment)); auto nextOrientation = getOrientation(nextSegment); #ifdef debug std::cerr << "Found next node " << nextNode->id() << " in orientation " << (nextOrientation ? "reverse" : "forward") << std::endl; #endif // Make an edge vg::Edge nextEdge; nextEdge.set_from(node->id()); nextEdge.set_from_start(orientation); nextEdge.set_to(nextNode->id()); nextEdge.set_to_end(nextOrientation); // Add it in. vg::VG deduplicates for us graph.add_edge(nextEdge); #ifdef debug std::cerr << "Made edge: " << pb2json(nextEdge) << std::endl; #endif } } // Spit out the graph. return graph; }
map<id_t, vector<Mapping>> PathIndex::parse_translation(const Translation& translation) { // We take as a precondition that the translation is replacing a set of old // nodes each with a nonempty set of new nodes. So we won't have to combine // nodes or parts of nodes. #ifdef debug cerr << "Partitioning translation: " << pb2json(translation) << endl; #endif // We'll populate this with the mappings that partition each old node. map<id_t, vector<Mapping>> old_node_to_new_nodes; // We know the new Mappings are conceptually nested in the old Mappings, so // we can use nested loops. // How many bases in the old and new paths are accounted for? size_t old_bases = 0; size_t new_bases = 0; // This represents our index in the new path size_t j = 0; for(size_t i = 0; i < translation.from().mapping_size(); i++) { // For every old mapping auto& from_mapping = translation.from().mapping(i); // Count up its bases old_bases += mapping_from_length(from_mapping); // Grab a reference to the list of replacement mappings auto& replacements = old_node_to_new_nodes[from_mapping.position().node_id()]; // We know the old mapping must have at least one new mapping in it do { // For each mapping in the new path, copy it auto to_mapping = translation.to().mapping(j); if (from_mapping.position().is_reverse()) { // Flip its strand if the mapping we're partitioning is backward to_mapping.mutable_position()->set_is_reverse(!to_mapping.position().is_reverse()); } // Account for its bases new_bases += mapping_from_length(to_mapping); // Copy it into the list for just this from node replacements.push_back(to_mapping); // Look at the next to mapping j++; } while (j < translation.to().mapping_size() && new_bases < old_bases); if (from_mapping.position().is_reverse()) { // Flip the order of the replacement mappings around reverse(replacements.begin(), replacements.end()); } #ifdef debug cerr << "Old node " << from_mapping.position().node_id() << " " << from_mapping.position().is_reverse() << " becomes: " << endl; for(auto& m : old_node_to_new_nodes[from_mapping.position().node_id()]) { cerr << "\t" << pb2json(m) << endl; } #endif } return old_node_to_new_nodes; }