/** * Create a VG grpah from a pinch thread set. */ vg::VG pinchToVG(stPinchThreadSet* threadSet, std::map<int64_t, std::string>& threadSequences) { // Make an empty graph vg::VG graph; // Remember what nodes have been created for what segments. Only the first // segment in a block (the "leader") gets a node. Segments without blocks // are also themselves leaders and get nodes. std::map<stPinchSegment*, vg::Node*> nodeForLeader; std::cerr << "Making pinch graph into vg graph with " << threadSequences.size() << " relevant threads" << std::endl; // This is the cleverest way to loop over Benedict's iterators. auto segmentIterator = stPinchThreadSet_getSegmentIt(threadSet); while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) { // For every segment, we need to make a VG node for it or its block (if // it has one). #ifdef debug std::cerr << "Found segment " << segment << std::endl; #endif // See if the segment is in a block auto block = stPinchSegment_getBlock(segment); // Get the leader segment: first in the block, or this segment if no block auto leader = getLeader(segment); if(nodeForLeader.count(leader)) { // A node has already been made for this block. continue; } // Otherwise, we need the sequence std::string sequence; if(block) { // Get the sequence by scanning through the block for the first sequence // that isn't all Ns, if any. auto segmentIterator = stPinchBlock_getSegmentIterator(block); while(auto sequenceSegment = stPinchBlockIt_getNext(&segmentIterator)) { if(!threadSequences.count(stPinchSegment_getName(sequenceSegment))) { // This segment is part of a staple. Pass it up continue; } // Go get the sequence of the thread, and clip out the part relevant to this segment. sequence = threadSequences.at(stPinchSegment_getName(sequenceSegment)).substr( stPinchSegment_getStart(sequenceSegment), stPinchSegment_getLength(sequenceSegment)); // If necessary, flip the segment around if(getOrientation(sequenceSegment)) { sequence = vg::reverse_complement(sequence); } if(std::count(sequence.begin(), sequence.end(), 'N') + std::count(sequence.begin(), sequence.end(), 'n') < sequence.size()) {\ // The sequence has some non-N characters // If it's not all Ns, break break; } // Otherwise try the next segment } } else { // Just pull the sequence from the lone segment sequence = threadSequences.at(stPinchSegment_getName(segment)).substr( stPinchSegment_getStart(segment), stPinchSegment_getLength(segment)); // It doesn't need to flip, since it can't be backwards in a block } // Make a node in the graph to represent the block vg::Node* node = graph.create_node(sequence); // Remember it nodeForLeader[leader] = node; #ifdef debug std::cerr << "Made node: " << pb2json(*node) << std::endl; #endif } // Now go through the segments again and wire them up. segmentIterator = stPinchThreadSet_getSegmentIt(threadSet); while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) { // See if the segment is in a block auto block = stPinchSegment_getBlock(segment); // Get the leader segment: first in the block, or this segment if no block auto leader = getLeader(segment); // We know we have a node already auto node = nodeForLeader.at(leader); // What orientation is this node in for the purposes of this edge // TODO: ought to always be false if the segment isn't in a block. Is this true? auto orientation = getOrientation(segment); #ifdef debug std::cerr << "Revisited segment: " << segment << " for node " << node->id() << " in orientation " << (orientation ? "reverse" : "forward") << std::endl; #endif // Look at the segment 5' of here. We know it's not a staple and // thus has a vg node. auto prevSegment = stPinchSegment_get5Prime(segment); if(prevSegment) { // Get the node IDs and orientations auto prevNode = nodeForLeader.at(getLeader(prevSegment)); auto prevOrientation = getOrientation(prevSegment); #ifdef debug std::cerr << "Found prev node " << prevNode->id() << " in orientation " << (prevOrientation ? "reverse" : "forward") << std::endl; #endif // Make an edge vg::Edge prevEdge; prevEdge.set_from(prevNode->id()); prevEdge.set_from_start(prevOrientation); prevEdge.set_to(node->id()); prevEdge.set_to_end(orientation); // Add it in. vg::VG deduplicates for us graph.add_edge(prevEdge); #ifdef debug std::cerr << "Made edge: " << pb2json(prevEdge) << std::endl; #endif } // Now do the same thing for the 3' side auto nextSegment = stPinchSegment_get3Prime(segment); if(nextSegment) { // Get the node IDs and orientations auto nextNode = nodeForLeader.at(getLeader(nextSegment)); auto nextOrientation = getOrientation(nextSegment); #ifdef debug std::cerr << "Found next node " << nextNode->id() << " in orientation " << (nextOrientation ? "reverse" : "forward") << std::endl; #endif // Make an edge vg::Edge nextEdge; nextEdge.set_from(node->id()); nextEdge.set_from_start(orientation); nextEdge.set_to(nextNode->id()); nextEdge.set_to_end(nextOrientation); // Add it in. vg::VG deduplicates for us graph.add_edge(nextEdge); #ifdef debug std::cerr << "Made edge: " << pb2json(nextEdge) << std::endl; #endif } } // Spit out the graph. return graph; }
// Used for interactive debugging. void stCaf_printBlock(stPinchBlock *block) { stPinchBlockIt blockIt = stPinchBlock_getSegmentIterator(block); stPinchSegment *segment; while ((segment = stPinchBlockIt_getNext(&blockIt)) != NULL) { stPinchThread *thread = stPinchSegment_getThread(segment); Cap *cap = flower_getCap(flower, stPinchThread_getName(thread)); Event *event = cap_getEvent(cap); Sequence *sequence = cap_getSequence(cap); printf("%s.%s:%" PRIi64 "-%" PRIi64 ":%s\n", event_getHeader(event), sequence_getHeader(sequence), stPinchSegment_getStart(segment), stPinchSegment_getStart(segment) + stPinchSegment_getLength(segment), stPinchSegment_getBlockOrientation(segment) ? "+" : "-"); } }