Ejemplo n.º 1
0
// Get the number of possible pairwise alignments that could support
// this block. Ordinarily this is (degree choose 2), but since we
// don't do outgroup self-alignment, it's a bit smaller.
static uint64_t numPossibleSupportingHomologies(stPinchBlock *block, Flower *flower) {
    uint64_t outgroupDegree = 0, ingroupDegree = 0;
    stPinchBlockIt segIt = stPinchBlock_getSegmentIterator(block);
    stPinchSegment *segment;
    while ((segment = stPinchBlockIt_getNext(&segIt)) != NULL) {
        Name capName = stPinchSegment_getName(segment);
        Cap *cap = flower_getCap(flower, capName);
        Event *event = cap_getEvent(cap);
        if (event_isOutgroup(event)) {
            outgroupDegree++;
        } else {
            ingroupDegree++;
        }
    }
    assert(outgroupDegree + ingroupDegree == stPinchBlock_getDegree(block));
    // We do the ingroup-ingroup alignments as an all-against-all
    // alignment, so we can see each ingroup-ingroup homology up to
    // twice.
    return choose2(ingroupDegree) * 2 + ingroupDegree * outgroupDegree;
}
Ejemplo n.º 2
0
/**
 * Create a VG grpah from a pinch thread set.
 */
vg::VG pinchToVG(stPinchThreadSet* threadSet, std::map<int64_t, std::string>& threadSequences) {
    // Make an empty graph
    vg::VG graph;
    
    // Remember what nodes have been created for what segments. Only the first
    // segment in a block (the "leader") gets a node. Segments without blocks
    // are also themselves leaders and get nodes.
    std::map<stPinchSegment*, vg::Node*> nodeForLeader;
    
    std::cerr << "Making pinch graph into vg graph with " << threadSequences.size() << " relevant threads" << std::endl;
    
    // This is the cleverest way to loop over Benedict's iterators.
    auto segmentIterator = stPinchThreadSet_getSegmentIt(threadSet);
    while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) {
        // For every segment, we need to make a VG node for it or its block (if
        // it has one).
        
#ifdef debug
        std::cerr << "Found segment " << segment << std::endl;
#endif
        
        // See if the segment is in a block
        auto block = stPinchSegment_getBlock(segment);
        
        // Get the leader segment: first in the block, or this segment if no block
        auto leader = getLeader(segment);
        
        if(nodeForLeader.count(leader)) {
            // A node has already been made for this block.
            continue;
        }
        
        // Otherwise, we need the sequence
        std::string sequence;
        
        if(block) {
            // Get the sequence by scanning through the block for the first sequence
            // that isn't all Ns, if any.
            auto segmentIterator = stPinchBlock_getSegmentIterator(block);
            while(auto sequenceSegment = stPinchBlockIt_getNext(&segmentIterator)) {
                if(!threadSequences.count(stPinchSegment_getName(sequenceSegment))) {
                    // This segment is part of a staple. Pass it up
                    continue;
                }
                
                // Go get the sequence of the thread, and clip out the part relevant to this segment.
                sequence = threadSequences.at(stPinchSegment_getName(sequenceSegment)).substr(
                    stPinchSegment_getStart(sequenceSegment), stPinchSegment_getLength(sequenceSegment));
                    
                // If necessary, flip the segment around
                if(getOrientation(sequenceSegment)) {
                    sequence = vg::reverse_complement(sequence);
                }
                
                if(std::count(sequence.begin(), sequence.end(), 'N') +
                    std::count(sequence.begin(), sequence.end(), 'n') < sequence.size()) {\
                    
                    // The sequence has some non-N characters
                    // If it's not all Ns, break
                    break;
                }
                
                // Otherwise try the next segment
            }
        } else {
            // Just pull the sequence from the lone segment
            sequence = threadSequences.at(stPinchSegment_getName(segment)).substr(
                stPinchSegment_getStart(segment), stPinchSegment_getLength(segment));
                
            // It doesn't need to flip, since it can't be backwards in a block
        }
        
            
        // Make a node in the graph to represent the block
        vg::Node* node = graph.create_node(sequence);
        
        // Remember it
        nodeForLeader[leader] = node;
#ifdef debug
        std::cerr << "Made node: " << pb2json(*node) << std::endl;
#endif
            
    }
    
    // Now go through the segments again and wire them up.
    segmentIterator = stPinchThreadSet_getSegmentIt(threadSet);
    while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) {
        // See if the segment is in a block
        auto block = stPinchSegment_getBlock(segment);
        
        // Get the leader segment: first in the block, or this segment if no block
        auto leader = getLeader(segment);
        
        // We know we have a node already
        auto node = nodeForLeader.at(leader);
        
        // What orientation is this node in for the purposes of this edge
        // TODO: ought to always be false if the segment isn't in a block. Is this true?
        auto orientation = getOrientation(segment);
#ifdef debug
        std::cerr << "Revisited segment: " << segment << " for node " << node->id() <<
            " in orientation " << (orientation ? "reverse" : "forward") << std::endl;
#endif
        
        // Look at the segment 5' of here. We know it's not a staple and
        // thus has a vg node.
        auto prevSegment = stPinchSegment_get5Prime(segment);
        
        if(prevSegment) {
            // Get the node IDs and orientations
            auto prevNode = nodeForLeader.at(getLeader(prevSegment));
            auto prevOrientation = getOrientation(prevSegment);
#ifdef debug
            std::cerr << "Found prev node " << prevNode->id() << " in orientation " << 
                (prevOrientation ? "reverse" : "forward") << std::endl;
#endif
            
            // Make an edge
            vg::Edge prevEdge;
            prevEdge.set_from(prevNode->id());
            prevEdge.set_from_start(prevOrientation);
            prevEdge.set_to(node->id());
            prevEdge.set_to_end(orientation);
            
            // Add it in. vg::VG deduplicates for us
            graph.add_edge(prevEdge);
#ifdef debug
            std::cerr << "Made edge: " << pb2json(prevEdge) << std::endl;
#endif
        }
        
        // Now do the same thing for the 3' side
        auto nextSegment = stPinchSegment_get3Prime(segment);
        
        if(nextSegment) {
            // Get the node IDs and orientations
            auto nextNode = nodeForLeader.at(getLeader(nextSegment));
            auto nextOrientation = getOrientation(nextSegment);
#ifdef debug
            std::cerr << "Found next node " << nextNode->id() << " in orientation " << 
                (nextOrientation ? "reverse" : "forward") << std::endl;
#endif
            
            // Make an edge
            vg::Edge nextEdge;
            nextEdge.set_from(node->id());
            nextEdge.set_from_start(orientation);
            nextEdge.set_to(nextNode->id());
            nextEdge.set_to_end(nextOrientation);
            
            // Add it in. vg::VG deduplicates for us
            graph.add_edge(nextEdge);
#ifdef debug
            std::cerr << "Made edge: " << pb2json(nextEdge) << std::endl;
#endif
        }
    }
    
    // Spit out the graph.
    return graph;

}