Пример #1
0
Alignment merge_alignments(const Alignment& a1, const Alignment& a2, bool debug) {
    //cerr << "overlap is " << overlap << endl;
    // if either doesn't have a path, then treat it like a massive softclip
    if (debug) cerr << "merging alignments " << endl << pb2json(a1) << endl << pb2json(a2) << endl;
    // concatenate them
    Alignment a3;
    a3.set_sequence(a1.sequence() + a2.sequence());
    *a3.mutable_path() = concat_paths(a1.path(), a2.path());
    if (debug) cerr << "merged alignments, result is " << endl << pb2json(a3) << endl;
    return a3;
}
Пример #2
0
void PathIndex::apply_translations(const vector<Translation>& translations) {
    // Convert from normal to partitioning translations
    
    // For each original node ID, we keep a vector of pairs of from mapping and
    // to mapping. We only keep pairs where the from mapping isn't empty.
    map<id_t, vector<pair<Mapping, Mapping>>> collated;
    
    for (auto& t : translations) {
        if (t.from().mapping_size() < 1 || t.to().mapping_size() != 1) {
            // Ensure the translations are the format we expect. They always have
            // at least one from mapping (but maybe an insert too) and exactly 1
            // to mapping.
            cerr << "error:[vg::PathIndex] Bad translation: " << pb2json(t) << endl;
            throw runtime_error("Translation not in VG::edit() format");
        }
        
        if (mapping_from_length(t.from().mapping(0)) == 0) {
            // This is a novel node and can't be on our path
            continue;
        }
        
        if (t.from().mapping(0).position().is_reverse()) {
            // Wait for the forward-orientation version
            continue;
        }
        
        // Stick the from and to mappings in the list for the from node
        collated[t.from().mapping(0).position().node_id()].push_back(make_pair(t.from().mapping(0), t.to().mapping(0)));
    }
    
    for (auto& kv : collated) {
        // For every original node and its replacement nodes
        
        // Sort the replacement mappings
        std::sort(kv.second.begin(), kv.second.end(), [](const pair<Mapping, Mapping>& a, const pair<Mapping, Mapping>& b) {
            // Return true if the a pair belongs before the b pair along the path through the original node
            return a.first.position().offset() <= b.first.position().offset();
        });
        
        // Make a new translation to cover the original node
        Translation covering;
        
        for (auto mapping_pair : kv.second) {
            // Split across these parts of new nodes
            *(covering.mutable_to()->add_mapping()) = mapping_pair.second;
        }
        
        // Just assume we take up the whole original node
        auto* from_mapping = covering.mutable_from()->add_mapping();
        from_mapping->mutable_position()->set_node_id(kv.first);
        // Give it a full length perfect match
        auto* from_edit = from_mapping->add_edit();
        from_edit->set_from_length(path_from_length(covering.to()));
        from_edit->set_to_length(from_edit->from_length());
        
        // Apply this (single node) translation.
        // TODO: batch up a bit?
        apply_translation(covering);
    }
}
Пример #3
0
void Caller::write_call_graph(ostream& out, bool json) {
    if (json) {
        out << pb2json(_call_graph.graph);
    } else {
        _call_graph.serialize_to_ostream(out);
    }
}
Пример #4
0
inline int64_t JSONStreamHelper<T>::write(std::ostream& out, bool json_out,
                                          int64_t buf_size) {    
    std::function<bool(T&)> reader = get_read_fn();        
    std::vector<T> buf;
    int64_t total = 0;
    bool good = true;
    std::function<T(size_t)> lambda = [&](size_t i) -> T {return buf[i];};
    while (good) {
        T obj;
        good = reader(obj);
        if (good) {
            buf.push_back(obj);
        }
        if (!good || buf.size() >= buf_size) {
            if (!json_out) {
                stream::write(out, buf.size(), lambda);
            } else {
                for (int i = 0; i < buf.size(); ++i) {
                    out << pb2json(buf[i]);
                }
            }
            total += buf.size();
            buf.clear();
        }
    }
    
    if (!json_out) {
        stream::finish(out);
    }
    
    out.flush();
    return total;
}
Пример #5
0
Alignment strip_from_start(const Alignment& aln, size_t drop) {
    if (!drop) return aln;
    Alignment res;
    res.set_name(aln.name());
    res.set_score(aln.score());
    //cerr << "drop " << drop << " from start" << endl;
    res.set_sequence(aln.sequence().substr(drop));
    if (!aln.has_path()) return res;
    *res.mutable_path() = cut_path(aln.path(), drop).second;
    assert(res.has_path());
    if (alignment_to_length(res) != res.sequence().size()) {
        cerr << "failed!!! drop from start 轰" << endl;
        cerr << pb2json(res) << endl << endl;
        assert(false);
    }
    return res;
}
Пример #6
0
Alignment strip_from_end(const Alignment& aln, size_t drop) {
    if (!drop) return aln;
    Alignment res;
    res.set_name(aln.name());
    res.set_score(aln.score());
    //cerr << "drop " << drop << " from end" << endl;
    size_t cut_at = aln.sequence().size()-drop;
    //cerr << "Cut at " << cut_at << endl;
    res.set_sequence(aln.sequence().substr(0, cut_at));
    if (!aln.has_path()) return res;
    *res.mutable_path() = cut_path(aln.path(), cut_at).first;
    assert(res.has_path());
    if (alignment_to_length(res) != res.sequence().size()) {
        cerr << "failed!!! drop from end 轰" << endl;
        cerr << pb2json(res) << endl << endl;
        assert(false);
    }
    return res;
}
Пример #7
0
/**
 * Create a VG grpah from a pinch thread set.
 */
vg::VG pinchToVG(stPinchThreadSet* threadSet, std::map<int64_t, std::string>& threadSequences) {
    // Make an empty graph
    vg::VG graph;
    
    // Remember what nodes have been created for what segments. Only the first
    // segment in a block (the "leader") gets a node. Segments without blocks
    // are also themselves leaders and get nodes.
    std::map<stPinchSegment*, vg::Node*> nodeForLeader;
    
    std::cerr << "Making pinch graph into vg graph with " << threadSequences.size() << " relevant threads" << std::endl;
    
    // This is the cleverest way to loop over Benedict's iterators.
    auto segmentIterator = stPinchThreadSet_getSegmentIt(threadSet);
    while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) {
        // For every segment, we need to make a VG node for it or its block (if
        // it has one).
        
#ifdef debug
        std::cerr << "Found segment " << segment << std::endl;
#endif
        
        // See if the segment is in a block
        auto block = stPinchSegment_getBlock(segment);
        
        // Get the leader segment: first in the block, or this segment if no block
        auto leader = getLeader(segment);
        
        if(nodeForLeader.count(leader)) {
            // A node has already been made for this block.
            continue;
        }
        
        // Otherwise, we need the sequence
        std::string sequence;
        
        if(block) {
            // Get the sequence by scanning through the block for the first sequence
            // that isn't all Ns, if any.
            auto segmentIterator = stPinchBlock_getSegmentIterator(block);
            while(auto sequenceSegment = stPinchBlockIt_getNext(&segmentIterator)) {
                if(!threadSequences.count(stPinchSegment_getName(sequenceSegment))) {
                    // This segment is part of a staple. Pass it up
                    continue;
                }
                
                // Go get the sequence of the thread, and clip out the part relevant to this segment.
                sequence = threadSequences.at(stPinchSegment_getName(sequenceSegment)).substr(
                    stPinchSegment_getStart(sequenceSegment), stPinchSegment_getLength(sequenceSegment));
                    
                // If necessary, flip the segment around
                if(getOrientation(sequenceSegment)) {
                    sequence = vg::reverse_complement(sequence);
                }
                
                if(std::count(sequence.begin(), sequence.end(), 'N') +
                    std::count(sequence.begin(), sequence.end(), 'n') < sequence.size()) {\
                    
                    // The sequence has some non-N characters
                    // If it's not all Ns, break
                    break;
                }
                
                // Otherwise try the next segment
            }
        } else {
            // Just pull the sequence from the lone segment
            sequence = threadSequences.at(stPinchSegment_getName(segment)).substr(
                stPinchSegment_getStart(segment), stPinchSegment_getLength(segment));
                
            // It doesn't need to flip, since it can't be backwards in a block
        }
        
            
        // Make a node in the graph to represent the block
        vg::Node* node = graph.create_node(sequence);
        
        // Remember it
        nodeForLeader[leader] = node;
#ifdef debug
        std::cerr << "Made node: " << pb2json(*node) << std::endl;
#endif
            
    }
    
    // Now go through the segments again and wire them up.
    segmentIterator = stPinchThreadSet_getSegmentIt(threadSet);
    while(auto segment = stPinchThreadSetSegmentIt_getNext(&segmentIterator)) {
        // See if the segment is in a block
        auto block = stPinchSegment_getBlock(segment);
        
        // Get the leader segment: first in the block, or this segment if no block
        auto leader = getLeader(segment);
        
        // We know we have a node already
        auto node = nodeForLeader.at(leader);
        
        // What orientation is this node in for the purposes of this edge
        // TODO: ought to always be false if the segment isn't in a block. Is this true?
        auto orientation = getOrientation(segment);
#ifdef debug
        std::cerr << "Revisited segment: " << segment << " for node " << node->id() <<
            " in orientation " << (orientation ? "reverse" : "forward") << std::endl;
#endif
        
        // Look at the segment 5' of here. We know it's not a staple and
        // thus has a vg node.
        auto prevSegment = stPinchSegment_get5Prime(segment);
        
        if(prevSegment) {
            // Get the node IDs and orientations
            auto prevNode = nodeForLeader.at(getLeader(prevSegment));
            auto prevOrientation = getOrientation(prevSegment);
#ifdef debug
            std::cerr << "Found prev node " << prevNode->id() << " in orientation " << 
                (prevOrientation ? "reverse" : "forward") << std::endl;
#endif
            
            // Make an edge
            vg::Edge prevEdge;
            prevEdge.set_from(prevNode->id());
            prevEdge.set_from_start(prevOrientation);
            prevEdge.set_to(node->id());
            prevEdge.set_to_end(orientation);
            
            // Add it in. vg::VG deduplicates for us
            graph.add_edge(prevEdge);
#ifdef debug
            std::cerr << "Made edge: " << pb2json(prevEdge) << std::endl;
#endif
        }
        
        // Now do the same thing for the 3' side
        auto nextSegment = stPinchSegment_get3Prime(segment);
        
        if(nextSegment) {
            // Get the node IDs and orientations
            auto nextNode = nodeForLeader.at(getLeader(nextSegment));
            auto nextOrientation = getOrientation(nextSegment);
#ifdef debug
            std::cerr << "Found next node " << nextNode->id() << " in orientation " << 
                (nextOrientation ? "reverse" : "forward") << std::endl;
#endif
            
            // Make an edge
            vg::Edge nextEdge;
            nextEdge.set_from(node->id());
            nextEdge.set_from_start(orientation);
            nextEdge.set_to(nextNode->id());
            nextEdge.set_to_end(nextOrientation);
            
            // Add it in. vg::VG deduplicates for us
            graph.add_edge(nextEdge);
#ifdef debug
            std::cerr << "Made edge: " << pb2json(nextEdge) << std::endl;
#endif
        }
    }
    
    // Spit out the graph.
    return graph;

}
Пример #8
0
map<id_t, vector<Mapping>> PathIndex::parse_translation(const Translation& translation) {

    // We take as a precondition that the translation is replacing a set of old
    // nodes each with a nonempty set of new nodes. So we won't have to combine
    // nodes or parts of nodes.
    
#ifdef debug
    cerr << "Partitioning translation: " << pb2json(translation) << endl;
#endif
    
    // We'll populate this with the mappings that partition each old node.
    map<id_t, vector<Mapping>> old_node_to_new_nodes;
    
    // We know the new Mappings are conceptually nested in the old Mappings, so
    // we can use nested loops.

    // How many bases in the old and new paths are accounted for?
    size_t old_bases = 0;
    size_t new_bases = 0;

    // This represents our index in the new path
    size_t j = 0;    
    
    for(size_t i = 0; i < translation.from().mapping_size(); i++) {
        // For every old mapping
        auto& from_mapping = translation.from().mapping(i);
        
        // Count up its bases
        old_bases += mapping_from_length(from_mapping);
        
        // Grab a reference to the list of replacement mappings
        auto& replacements = old_node_to_new_nodes[from_mapping.position().node_id()];
        
        // We know the old mapping must have at least one new mapping in it
        do {
            // For each mapping in the new path, copy it
            auto to_mapping = translation.to().mapping(j);
            
            if (from_mapping.position().is_reverse()) {
                // Flip its strand if the mapping we're partitioning is backward
                to_mapping.mutable_position()->set_is_reverse(!to_mapping.position().is_reverse());
            }
            
            // Account for its bases
            new_bases += mapping_from_length(to_mapping);
            
            // Copy it into the list for just this from node
            replacements.push_back(to_mapping);
            
            // Look at the next to mapping
            j++;
        } while (j < translation.to().mapping_size() && new_bases < old_bases);
        
        if (from_mapping.position().is_reverse()) {
            // Flip the order of the replacement mappings around
            reverse(replacements.begin(), replacements.end());
        }
        
#ifdef debug
        cerr << "Old node " << from_mapping.position().node_id() << " "
            << from_mapping.position().is_reverse() << " becomes: " << endl;
        for(auto& m : old_node_to_new_nodes[from_mapping.position().node_id()]) {
            cerr << "\t" << pb2json(m) << endl;
        }
#endif
    }
    
    return old_node_to_new_nodes;

}