/** * Filter reads that are less than <PCTID> reference. * I.E. if a read matches the reference along 80% of its * length, and your cutoff is 90% PCTID, throw it out. */ Alignment Filter::percent_identity_filter(Alignment& aln){ double read_pctid = 0.0; //read pct_id = len(matching sequence / len(total sequence) int64_t aln_total_len = aln.sequence().size(); int64_t aln_match_len = 0; std::function<double(int64_t, int64_t)> calc_pct_id = [](int64_t rp, int64_t ttlp){ return ((double) rp / (double) ttlp); }; Path path = aln.path(); //TODO handle reversing mappings for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); for (int j = 0; j < mapping.edit_size(); j++){ Edit ee = mapping.edit(j); if (ee.from_length() == ee.to_length() && ee.sequence() == ""){ aln_match_len += ee.to_length(); } } } if (calc_pct_id(aln_match_len, aln_total_len) < min_percent_identity){ return inverse ? aln : Alignment(); } return inverse ? Alignment() : aln; }
vector<double> Vectorizer::alignment_to_identity_hot(Alignment a){ int64_t entity_size = my_xg->node_count + my_xg->edge_count; vector<double> ret(entity_size, 0.0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i ++){ Mapping mapping = path.mapping(i); if(! mapping.has_position()){ continue; } Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); //Calculate % identity by walking the edits and counting matches. double pct_id = 0.0; double match_len = 0.0; double total_len = 0.0; for (int j = 0; j < mapping.edit_size(); j++){ Edit e = mapping.edit(j); total_len += e.from_length(); if (e.from_length() == e.to_length() && e.sequence() == ""){ match_len += (double) e.to_length(); } else if (e.from_length() == e.to_length() && e.sequence() != ""){ // TODO if we map but don't match exactly, add half the average length to match_length //match_len += (double) (0.5 * ((double) e.to_length())); } else{ } } pct_id = (match_len == 0.0 && total_len == 0.0) ? 0.0 : (match_len / total_len); ret[key - 1] = pct_id; if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); ret[edge_key - 1] = 1.0; } } } return ret; }
Alignment Filter::depth_filter(Alignment& aln){ if (use_avg && window_length != 0){ } else if (use_avg != 0){ } else{ } Path path = aln.path(); //TODO handle reversing mappings vector<int>* qual_window; if (window_length > 0){ qual_window = new vector<int>(); } for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); Position start_pos = mapping.position(); int64_t start_node = start_pos.node_id(); int64_t start_offset = start_pos.offset(); int64_t curr_offset_in_graph = 0; int64_t curr_offset_in_alignment = 0; stringstream pst; pst << start_node << "_" << curr_offset_in_graph; string p_hash = pst.str(); for (int j = 0; j < mapping.edit_size(); j++){ Edit ee = mapping.edit(j); if (ee.from_length() == ee.to_length() && ee.sequence() == ""){ if (!filter_matches){ continue; } } stringstream est; est << ee.from_length() << "_" << ee.to_length() << "_" + ee.sequence(); string e_hash = est.str(); #pragma omp critical(write) pos_to_edit_to_depth[p_hash][e_hash] += 1; /** * If an edit fails the filter, either return a new empty alignment * OR * return a new alignment identical to the old one EXCEPT where * the offending edit has been replaced by a match to the reference. */ if (pos_to_edit_to_depth[p_hash][e_hash] < min_depth){ if (!remove_failing_edits){ return inverse ? aln : Alignment(); } else { Alignment edited_aln = Alignment(aln); edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_sequence(""); edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_from_length(ee.from_length()); edited_aln.mutable_path()->mutable_mapping(i)->mutable_edit(j)->set_to_length(ee.from_length()); return edited_aln; } } } return inverse ? Alignment() : aln; } }