コード例 #1
0
ファイル: pileup.cpp プロジェクト: cartoonist/vg
void Pileups::compute_from_alignment(VG& graph, Alignment& alignment) {
    // if we start reversed
    if (alignment.has_path() && alignment.path().mapping(0).position().is_reverse()) {
        alignment = reverse_alignment(alignment,
                                      (function<int64_t(int64_t)>) ([&graph](int64_t id) {
                                          return graph.get_node(id)->sequence().size();
                                          }));
    }
    const Path& path = alignment.path();
    int64_t read_offset = 0;
    for (int i = 0; i < path.mapping_size(); ++i) {
        const Mapping& mapping = path.mapping(i);
        if (graph.has_node(mapping.position().node_id())) {
            const Node* node = graph.get_node(mapping.position().node_id());
            NodePileup* pileup = get_create(node->id());
            int64_t node_offset = mapping.position().offset();
            for (int j = 0; j < mapping.edit_size(); ++j) {
                const Edit& edit = mapping.edit(j);
                // process all pileups in edit.
                // update the offsets as we go
                compute_from_edit(*pileup, node_offset, read_offset, *node,
                                  alignment, mapping, edit);
            }
        }
    }
    assert(alignment.sequence().empty() ||
           alignment.path().mapping_size() == 0 ||
           read_offset == alignment.sequence().length());
}
コード例 #2
0
ファイル: sampler.cpp プロジェクト: adamnovak/vg
Alignment Sampler::alignment_with_error(size_t length,
                                        double base_error,
                                        double indel_error) {
    size_t maxiter = 100;
    Alignment aln;
    if (base_error > 0 || indel_error > 0) {
        // sample a longer-than necessary alignment, then trim
        size_t iter = 0;
        while (iter++ < maxiter) {
            aln = mutate(
                alignment(length + 2 * ((double) length * indel_error)),
                base_error, indel_error);
            if (aln.sequence().size() == length) {
                break;
            } else if (aln.sequence().size() > length) {
                aln = strip_from_end(aln, aln.sequence().size() - length);
                break;
            }
        }
        if (iter == maxiter) {
            cerr << "[vg::Sampler] Warning: could not generate alignment of sufficient length. "
                 << "Graph may be too small, or indel rate too high." << endl;
        }
    } else {
        aln = alignment(length);
    }
    aln.set_identity(identity(aln.path()));
    return aln;
}
コード例 #3
0
ファイル: filter.cpp プロジェクト: cmarkello/vg
    Alignment Filter::soft_clip_filter(Alignment& aln){
        //Find overhangs - portions of the read that
        // are inserted at the ends.
        if (aln.path().mapping_size() > 0){
            Path path = aln.path();
            Edit left_edit = path.mapping(0).edit(0);
            Edit right_edit = path.mapping(path.mapping_size() - 1).edit(path.mapping(path.mapping_size() - 1).edit_size() - 1);
            int left_overhang = left_edit.to_length() - left_edit.from_length();
            int right_overhang = right_edit.to_length() - right_edit.from_length();
            if (left_overhang > soft_clip_limit || right_overhang > soft_clip_limit){
                return inverse ? Alignment() : aln;
            }
            else{
                return inverse ?  aln : Alignment();
            }
        }
        else{
            if (aln.sequence().length() > soft_clip_limit){
                return inverse ? Alignment() : aln;
            }
            cerr << "WARNING: SHORT ALIGNMENT: " << aln.sequence().size() << "bp" << endl
                << "WITH NO MAPPINGS TO REFERENCE" << endl
                << "CONSIDER REMOVING IT FROM ANALYSIS" << endl;
            return inverse ? Alignment() : aln;
        }

    }
コード例 #4
0
ファイル: alignment.cpp プロジェクト: ktym/vg
string alignment_to_sam(const Alignment& alignment,
                        const string& refseq,
                        const int32_t refpos,
                        const string& cigar,
                        const string& mateseq,
                        const int32_t matepos,
                        const int32_t tlen) {
    stringstream sam;

    sam << (!alignment.name().empty() ? alignment.name() : "*") << "\t"
        << sam_flag(alignment) << "\t"
        << (refseq.empty() ? "*" : refseq) << "\t"
        << refpos + 1 << "\t"
        //<< (alignment.path().mapping_size() ? refpos + 1 : 0) << "\t" // positions are 1-based in SAM, 0 means unmapped
        << alignment.mapping_quality() << "\t"
        << (alignment.has_path() && alignment.path().mapping_size() ? cigar : "*") << "\t"
        << (mateseq == refseq ? "=" : mateseq) << "\t"
        << matepos + 1 << "\t"
        << tlen << "\t"
        << (!alignment.sequence().empty() ? alignment.sequence() : "*") << "\t";
    // hack much?
    if (!alignment.quality().empty()) {
        const string& quality = alignment.quality();
        for (int i = 0; i < quality.size(); ++i) {
            sam << quality_short_to_char(quality[i]);
        }
    } else {
        sam << "*";
        //sam << string(alignment.sequence().size(), 'I');
    }
    //<< (alignment.has_quality() ? string_quality_short_to_char(alignment.quality()) : string(alignment.sequence().size(), 'I'));
    if (!alignment.read_group().empty()) sam << "\tRG:Z:" << alignment.read_group();
    sam << "\n";
    return sam.str();
}
コード例 #5
0
ファイル: alignment.cpp プロジェクト: ktym/vg
Alignment merge_alignments(const Alignment& a1, const Alignment& a2, bool debug) {
    //cerr << "overlap is " << overlap << endl;
    // if either doesn't have a path, then treat it like a massive softclip
    if (debug) cerr << "merging alignments " << endl << pb2json(a1) << endl << pb2json(a2) << endl;
    // concatenate them
    Alignment a3;
    a3.set_sequence(a1.sequence() + a2.sequence());
    *a3.mutable_path() = concat_paths(a1.path(), a2.path());
    if (debug) cerr << "merged alignments, result is " << endl << pb2json(a3) << endl;
    return a3;
}
コード例 #6
0
ファイル: gssw_aligner.cpp プロジェクト: JervenBolleman/vg
void QualAdjAligner::align(Alignment& alignment, Graph& g, bool print_score_matrices) {

    gssw_graph* graph = create_gssw_graph(g, 0, nullptr);

    const string& sequence = alignment.sequence();
    const string& quality = alignment.quality();

    if (quality.length() != sequence.length()) {
        cerr << "error:[Aligner] sequence and quality strings different lengths, cannot perform base quality adjusted alignmenterror:[Aligner] sequence and quality strings different lengths, cannot perform base quality adjusted alignment" << endl;
    }

    gssw_graph_fill_qual_adj(graph, sequence.c_str(), quality.c_str(),
                             nt_table, adjusted_score_matrix,
                             scaled_gap_open, scaled_gap_extension, 15, 2);

    gssw_graph_mapping* gm = gssw_graph_trace_back_qual_adj (graph,
                             sequence.c_str(),
                             quality.c_str(),
                             sequence.size(),
                             nt_table,
                             adjusted_score_matrix,
                             scaled_gap_open,
                             scaled_gap_extension);

    gssw_mapping_to_alignment(graph, gm, alignment, print_score_matrices);

#ifdef debug
    gssw_print_graph_mapping(gm, stderr);
#endif

    gssw_graph_mapping_destroy(gm);
    gssw_graph_destroy(graph);
}
コード例 #7
0
ファイル: filter.cpp プロジェクト: cmarkello/vg
    /**
     * Filter reads that are less than <PCTID> reference.
     * I.E. if a read matches the reference along 80% of its
     * length, and your cutoff is 90% PCTID, throw it out.
     */
    Alignment Filter::percent_identity_filter(Alignment& aln){
        double read_pctid = 0.0;
        //read pct_id = len(matching sequence / len(total sequence)

        int64_t aln_total_len = aln.sequence().size();
        int64_t aln_match_len = 0;

        std::function<double(int64_t, int64_t)> calc_pct_id = [](int64_t rp, int64_t ttlp){
            return ((double) rp / (double) ttlp);
        };



        Path path = aln.path();
        //TODO handle reversing mappings

        for (int i = 0; i < path.mapping_size(); i++){
            Mapping mapping = path.mapping(i);

            for (int j = 0; j < mapping.edit_size(); j++){
                Edit ee = mapping.edit(j);
                if (ee.from_length() == ee.to_length() && ee.sequence() == ""){
                    aln_match_len += ee.to_length();
                }

            }
        }
        if (calc_pct_id(aln_match_len, aln_total_len) < min_percent_identity){
            return inverse ? aln : Alignment();
        }

        return inverse ? Alignment() : aln;


    }
コード例 #8
0
ファイル: alignment.cpp プロジェクト: ktym/vg
Alignment strip_from_start(const Alignment& aln, size_t drop) {
    if (!drop) return aln;
    Alignment res;
    res.set_name(aln.name());
    res.set_score(aln.score());
    //cerr << "drop " << drop << " from start" << endl;
    res.set_sequence(aln.sequence().substr(drop));
    if (!aln.has_path()) return res;
    *res.mutable_path() = cut_path(aln.path(), drop).second;
    assert(res.has_path());
    if (alignment_to_length(res) != res.sequence().size()) {
        cerr << "failed!!! drop from start 轰" << endl;
        cerr << pb2json(res) << endl << endl;
        assert(false);
    }
    return res;
}
コード例 #9
0
ファイル: alignment.cpp プロジェクト: ktym/vg
Alignment strip_from_end(const Alignment& aln, size_t drop) {
    if (!drop) return aln;
    Alignment res;
    res.set_name(aln.name());
    res.set_score(aln.score());
    //cerr << "drop " << drop << " from end" << endl;
    size_t cut_at = aln.sequence().size()-drop;
    //cerr << "Cut at " << cut_at << endl;
    res.set_sequence(aln.sequence().substr(0, cut_at));
    if (!aln.has_path()) return res;
    *res.mutable_path() = cut_path(aln.path(), cut_at).first;
    assert(res.has_path());
    if (alignment_to_length(res) != res.sequence().size()) {
        cerr << "failed!!! drop from end 轰" << endl;
        cerr << pb2json(res) << endl << endl;
        assert(false);
    }
    return res;
}
コード例 #10
0
ファイル: alignment.cpp プロジェクト: ktym/vg
Alignment reverse_alignment(const Alignment& aln, const function<int64_t(int64_t)>& node_length) {
    // We're going to reverse the alignment and all its mappings.
    // TODO: should we/can we do this in place?
    
    Alignment reversed = aln;
    reversed.set_sequence(reverse_complement(aln.sequence()));
    
    if(aln.has_path()) {
    
        // Now invert the order of the mappings, and for each mapping, flip the
        // is_reverse flag. The edits within mappings also get put in reverse
        // order, get their positions corrected, and get their sequences get
        // reverse complemented.
        *reversed.mutable_path() = reverse_path(aln.path(), node_length);
    }
    
    return reversed;
}
コード例 #11
0
ファイル: gssw_aligner.cpp プロジェクト: abeconnelly/vg
void GSSWAligner::align(Alignment& alignment) {

    const string& sequence = alignment.sequence();

    gssw_graph_fill(graph, sequence.c_str(),
                    nt_table, score_matrix,
                    gap_open, gap_extension, 15, 2);

    gssw_graph_mapping* gm = gssw_graph_trace_back (graph,
                                                    sequence.c_str(),
                                                    sequence.size(),
                                                    match,
                                                    mismatch,
                                                    gap_open,
                                                    gap_extension);

    gssw_mapping_to_alignment(gm, alignment);

    //gssw_print_graph_mapping(gm);
    gssw_graph_mapping_destroy(gm);

}
コード例 #12
0
ファイル: graph.cpp プロジェクト: mariokostelac/cpppoa
    void Graph::insertSequenceAlignment(const Alignment& alignment,
                                        const string& seq,
                                        const string& label) {
        const string& aln_sequence = alignment.sequence();
        const deque<int>& seq_idxs = alignment.seq_idxs();
        const deque<int>& node_ids = alignment.node_ids();

        int first_id = -1;
        int head_id = -1;
        int tail_id = -1;
        pair<uint32_t, uint32_t> prefix_end_ids;
        pair<uint32_t, uint32_t> suffix_end_ids;

        // because of local alignment prefix or sufix of sequence can be unaligned
        // so we add it directly to graph
        deque<uint32_t> valid_seq_idxs;
        for (auto idx : seq_idxs) {
            if (idx != -1) {
                valid_seq_idxs.emplace_back(idx);
            }
        }
        uint32_t aln_seq_start_idx = valid_seq_idxs.front();
        uint32_t aln_seq_end_idx = valid_seq_idxs.back();

        if (aln_seq_start_idx > 0) {
            prefix_end_ids = addUnmatchedSequence(
                                    aln_sequence.substr(0, aln_seq_start_idx),
                                    label,
                                    false);
            first_id = prefix_end_ids.first;
            head_id = prefix_end_ids.second;
        }

        if (aln_seq_end_idx < aln_sequence.length()) {
            suffix_end_ids = addUnmatchedSequence(
                                    aln_sequence.substr(aln_seq_end_idx + 1),
                                    label,
                                    false);
            tail_id = suffix_end_ids.first;
        }

        // aligned part of sequence
        uint32_t size = max(seq_idxs.size(), node_ids.size());
        for (uint32_t i = 0; i < size; ++i) {
            auto& seq_idx = seq_idxs[i];
            auto& match_id = node_ids[i];

            if (seq_idx == -1) {
                continue;
            }

            int node_id = -1;
            char base = aln_sequence[seq_idx];
            if (match_id == -1) {
                // if sequence base unmatched with graph node add new node
                addNode(base);
                node_id = next_id_ - 1;
            } else if (nodes_[match_id]->base() == base) {
                // if sequence base matched to a node with same base
                node_id = match_id;
            } else {
                // if sequence base matched to a node with different base
                // which is aligned to a node with same base
                int found_node_id = -1;
                for (auto id : nodes_[match_id]->getAlignedIds()) {
                    if (nodes_[id]->base() == base) {
                        found_node_id = id;
                        break;
                    }
                }

                if (found_node_id == -1) {
                    // we didn't find aligned node with same base
                    addNode(base);
                    node_id = next_id_ - 1;
                    // add all aligned to nodes to newly created node
                    for (auto id : nodes_[match_id]->getAlignedIds()) {
                        nodes_[node_id]->addAlignedNode(id);
                    }
                    nodes_[node_id]->addAlignedNode(match_id);

                    // to nodes aligned to newly created node add this node
                    // as aligned to
                    for (auto id : nodes_[node_id]->getAlignedIds()) {
                        nodes_[id]->addAlignedNode(node_id);
                    }
                } else {
                    // node id is found node id
                    node_id = found_node_id;
                }
            }

            if (head_id != -1 && node_id != -1) {
                addEdge(head_id, node_id, label);
            }
            head_id = node_id;
            if (first_id == -1) {
                first_id = head_id;
            }
        }

        // connect aligned part with unaligned suffix
        if (head_id != -1 && tail_id != -1) {
            addEdge(head_id, tail_id, label);
        }

        // resort nodes order
        topological_sort();

        sequences_.emplace_back(seq);
        labels_.emplace_back(label);
        start_ids_.emplace_back(first_id);
    }
コード例 #13
0
ファイル: utility.hpp プロジェクト: vgteam/vg
/// Make seeds for Alignments based on their sequences.
inline string make_shuffle_seed(const Alignment& aln) {
    return aln.sequence();
}
コード例 #14
0
ファイル: gssw_aligner.cpp プロジェクト: JervenBolleman/vg
void Aligner::align_internal(Alignment& alignment, vector<Alignment>* multi_alignments, Graph& g,
                             int64_t pinned_node_id, bool pin_left, int32_t max_alt_alns, bool print_score_matrices) {

    // check input integrity
    if (pin_left && !pinned_node_id) {
        cerr << "error:[Aligner] cannot choose pinned end in non-pinned alignment" << endl;
        exit(EXIT_FAILURE);
    }
    if (multi_alignments && !pinned_node_id) {
        cerr << "error:[Aligner] multiple traceback is not valid in local alignment, only pinned and global" << endl;
        exit(EXIT_FAILURE);
    }
    if (!(multi_alignments) && max_alt_alns != 1) {
        cerr << "error:[Aligner] cannot specify maximum number of alignments in single alignment" << endl;
        exit(EXIT_FAILURE);
    }


    // alignment pinning algorithm is based on pinning in bottom right corner, if pinning in top
    // left we need to reverse all the sequences first and translate the alignment back later

    // create reversed objects if necessary
    Graph reversed_graph;
    string reversed_sequence;
    if (pin_left) {
        reversed_sequence.resize(alignment.sequence().length());

        reverse_copy(alignment.sequence().begin(), alignment.sequence().end(), reversed_sequence.begin());
        reverse_graph(g, reversed_graph);
    }

    // choose forward or reversed objects
    Graph* align_graph;
    string* align_sequence;
    if (pin_left) {
        align_graph = &reversed_graph;
        align_sequence = &reversed_sequence;
    }
    else {
        align_graph = &g;
        align_sequence = alignment.mutable_sequence();
    }

    // convert into gssw graph and get the counterpart to pinned node (if pinning)
    gssw_node* pinned_node = nullptr;
    gssw_graph* graph = create_gssw_graph(*align_graph, pinned_node_id, &pinned_node);

    if (pinned_node_id & !pinned_node) {
        cerr << "error:[Aligner] pinned node for pinned alignment is not in graph" << endl;
        exit(EXIT_FAILURE);
    }

    // perform dynamic programming
    gssw_graph_fill(graph, (*align_sequence).c_str(),
                    nt_table, score_matrix,
                    gap_open, gap_extension, 15, 2);

    // traceback either from pinned position or optimal local alignment
    if (pinned_node) {
        // trace back pinned alignment
        gssw_graph_mapping** gms = gssw_graph_trace_back_pinned_multi (graph,
                                   pinned_node,
                                   max_alt_alns,
                                   (*align_sequence).c_str(),
                                   (*align_sequence).size(),
                                   nt_table,
                                   score_matrix,
                                   gap_open,
                                   gap_extension);

        if (pin_left) {
            // translate graph and mappings into original node space
            unreverse_graph(reversed_graph);
            for (int32_t i = 0; i < max_alt_alns; i++) {
                unreverse_graph_mapping(gms[i]);
            }
        }

        // convert optimal alignment and store it in the input Alignment object (in the multi alignment,
        // this will have been set to the first in the vector)
        if (gms[0]->score > 0) {
            // have a mapping, can just convert normally
            gssw_mapping_to_alignment(graph, gms[0], alignment, print_score_matrices);
        }
        else {
            // gssw will not identify mappings with 0 score, infer location based on pinning

            Mapping* mapping = alignment.mutable_path()->add_mapping();
            mapping->set_rank(1);

            // locate at the end of the node
            Position* position = mapping->mutable_position();
            position->set_node_id(pinned_node_id);
            position->set_offset(pin_left ? 0 : pinned_node->len);

            // soft clip
            Edit* edit = mapping->add_edit();
            edit->set_to_length(alignment.sequence().length());
            edit->set_sequence(alignment.sequence());
        }


        if (multi_alignments) {
            // determine how many non-null alignments were returned
            int32_t num_non_null = max_alt_alns;
            for (int32_t i = 1; i < max_alt_alns; i++) {
                if (gms[i]->score <= 0) {
                    num_non_null = i;
                    break;
                }
            }

            // reserve to avoid illegal access errors that occur when the vector reallocates
            multi_alignments->reserve(num_non_null);

            // copy the primary alignment
            multi_alignments->emplace_back(alignment);

            // convert the alternate alignments and store them at the back of the vector (this will not
            // execute if we are doing single alignment)
            for (int32_t i = 1; i < num_non_null; i++) {
                gssw_graph_mapping* gm = gms[i];

                // make new alignment object
                multi_alignments->emplace_back();
                Alignment& next_alignment = multi_alignments->back();

                // copy over sequence information from the primary alignment
                next_alignment.set_sequence(alignment.sequence());
                next_alignment.set_quality(alignment.quality());

                // get path of the alternate alignment
                gssw_mapping_to_alignment(graph, gm, next_alignment, print_score_matrices);

            }
        }

        for (int32_t i = 0; i < max_alt_alns; i++) {
            gssw_graph_mapping_destroy(gms[i]);
        }
        free(gms);
    }
    else {
        // trace back local alignment
        gssw_graph_mapping* gm = gssw_graph_trace_back (graph,
                                 (*align_sequence).c_str(),
                                 (*align_sequence).size(),
                                 nt_table,
                                 score_matrix,
                                 gap_open,
                                 gap_extension);

        gssw_mapping_to_alignment(graph, gm, alignment, print_score_matrices);
        gssw_graph_mapping_destroy(gm);
    }

    //gssw_graph_print_score_matrices(graph, sequence.c_str(), sequence.size(), stderr);

    gssw_graph_destroy(graph);

}