Exemple #1
0
    /**
     * Filter reads that are less than <PCTID> reference.
     * I.E. if a read matches the reference along 80% of its
     * length, and your cutoff is 90% PCTID, throw it out.
     */
    Alignment Filter::percent_identity_filter(Alignment& aln){
        double read_pctid = 0.0;
        //read pct_id = len(matching sequence / len(total sequence)

        int64_t aln_total_len = aln.sequence().size();
        int64_t aln_match_len = 0;

        std::function<double(int64_t, int64_t)> calc_pct_id = [](int64_t rp, int64_t ttlp){
            return ((double) rp / (double) ttlp);
        };



        Path path = aln.path();
        //TODO handle reversing mappings

        for (int i = 0; i < path.mapping_size(); i++){
            Mapping mapping = path.mapping(i);

            for (int j = 0; j < mapping.edit_size(); j++){
                Edit ee = mapping.edit(j);
                if (ee.from_length() == ee.to_length() && ee.sequence() == ""){
                    aln_match_len += ee.to_length();
                }

            }
        }
        if (calc_pct_id(aln_match_len, aln_total_len) < min_percent_identity){
            return inverse ? aln : Alignment();
        }

        return inverse ? Alignment() : aln;


    }
Exemple #2
0
vector<double> Vectorizer::alignment_to_identity_hot(Alignment a){
    int64_t entity_size = my_xg->node_count + my_xg->edge_count;
    vector<double> ret(entity_size, 0.0);

    Path path = a.path();
    for (int i = 0; i < path.mapping_size(); i ++){
        Mapping mapping = path.mapping(i);
        if(! mapping.has_position()){
            continue;
        }
        Position pos = mapping.position();
        int64_t node_id = pos.node_id();
        int64_t key = my_xg->node_rank_as_entity(node_id);

        //Calculate % identity by walking the edits and counting matches.
        double pct_id = 0.0;
        double match_len = 0.0;
        double total_len = 0.0;

        for (int j = 0; j < mapping.edit_size(); j++){
            Edit e = mapping.edit(j);
            total_len += e.from_length();
            if (e.from_length() == e.to_length() && e.sequence() == ""){
                match_len += (double) e.to_length();
            }
            else if (e.from_length() == e.to_length() && e.sequence() != ""){
                // TODO if we map but don't match exactly, add half the average length to match_length
                //match_len += (double) (0.5 * ((double) e.to_length()));
            }
            else{
                
            }
            
        }
        pct_id = (match_len == 0.0 && total_len == 0.0) ? 0.0 : (match_len / total_len);
        ret[key - 1] = pct_id;

        if (i > 0){
            Mapping prev_mapping = path.mapping(i - 1);
            Position prev_pos = prev_mapping.position();
            int64_t prev_node_id = prev_pos.node_id();
            if (my_xg->has_edge(prev_node_id, false, node_id, false)){
                int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false);
                ret[edge_key - 1] = 1.0;
            }
        }
    }
    return ret;
}
Exemple #3
0
    Alignment Filter::depth_filter(Alignment& aln){
        if (use_avg && window_length != 0){

        }
        else if (use_avg != 0){

        }
        else{

        }

        Path path = aln.path();
        //TODO handle reversing mappings
        vector<int>* qual_window;
        if (window_length > 0){
            qual_window = new vector<int>();
        }

        for (int i = 0; i < path.mapping_size(); i++){
            Mapping mapping = path.mapping(i);
            Position start_pos = mapping.position();
            int64_t start_node = start_pos.node_id();
            int64_t start_offset = start_pos.offset();
            int64_t curr_offset_in_graph = 0;
            int64_t curr_offset_in_alignment = 0;
            stringstream pst;

            pst << start_node << "_" << curr_offset_in_graph;
            string p_hash = pst.str();
            for (int j = 0; j < mapping.edit_size(); j++){
                Edit ee = mapping.edit(j);
                if (ee.from_length() == ee.to_length() && ee.sequence() == ""){
                    if (!filter_matches){
                        continue;
                    }
                }
                stringstream est;
                est <<  ee.from_length() << "_" << ee.to_length() << "_" + ee.sequence();
                string e_hash = est.str();
#pragma omp critical(write)
                pos_to_edit_to_depth[p_hash][e_hash] += 1;
                /**
                 * If an edit fails the filter, either return a new empty alignment
                 * OR
                 * return a new alignment identical to the old one EXCEPT where
                 * the offending edit has been replaced by a match to the reference.
                 */
                if (pos_to_edit_to_depth[p_hash][e_hash] < min_depth){
                    if (!remove_failing_edits){
                        return inverse ? aln : Alignment();
                    }

                    else {
                        Alignment edited_aln = Alignment(aln);
                        edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_sequence("");
                        edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_from_length(ee.from_length());
                        edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_to_length(ee.from_length());
                        return edited_aln;
                    }
                }
            }
            return inverse ? Alignment() : aln;
        }


    }