string alignment_to_sam(const Alignment& alignment, const string& refseq, const int32_t refpos, const string& cigar, const string& mateseq, const int32_t matepos, const int32_t tlen) { stringstream sam; sam << (!alignment.name().empty() ? alignment.name() : "*") << "\t" << sam_flag(alignment) << "\t" << (refseq.empty() ? "*" : refseq) << "\t" << refpos + 1 << "\t" //<< (alignment.path().mapping_size() ? refpos + 1 : 0) << "\t" // positions are 1-based in SAM, 0 means unmapped << alignment.mapping_quality() << "\t" << (alignment.has_path() && alignment.path().mapping_size() ? cigar : "*") << "\t" << (mateseq == refseq ? "=" : mateseq) << "\t" << matepos + 1 << "\t" << tlen << "\t" << (!alignment.sequence().empty() ? alignment.sequence() : "*") << "\t"; // hack much? if (!alignment.quality().empty()) { const string& quality = alignment.quality(); for (int i = 0; i < quality.size(); ++i) { sam << quality_short_to_char(quality[i]); } } else { sam << "*"; //sam << string(alignment.sequence().size(), 'I'); } //<< (alignment.has_quality() ? string_quality_short_to_char(alignment.quality()) : string(alignment.sequence().size(), 'I')); if (!alignment.read_group().empty()) sam << "\tRG:Z:" << alignment.read_group(); sam << "\n"; return sam.str(); }
pair<Alignment, Alignment> Filter::depth_filter(Alignment& aln_first, Alignment& aln_second){ aln_first = depth_filter(aln_first); aln_second = depth_filter(aln_second); if (!(aln_first.name() == "") && !(aln_first.name() == "")){ return inverse ? make_pair(aln_first, aln_second) : make_pair(Alignment(), Alignment()); } else{ return inverse ? make_pair(Alignment(), Alignment()) : make_pair(aln_first, aln_second); } }
std::pair<Alignment, Alignment> Filter::path_length_filter(Alignment& aln_first, Alignment& aln_second){ Alignment x = path_length_filter(aln_first); Alignment y = path_length_filter(aln_second); if (x.name().empty() || y.name().empty()){ return inverse ? make_pair(x, y) : make_pair(Alignment(), Alignment()); } else{ return inverse ? make_pair(Alignment(), Alignment()) : make_pair(x, y); } }
pair<Alignment, Alignment> percent_identity_filter(Alignment& aln_first, Alignment& aln_second){ if (aln_first.name() == "" || aln_first.name() == ""){ return make_pair(Alignment(), Alignment()); } else{ return make_pair(aln_first, aln_second); } }
Alignment Sampler::mutate(const Alignment& aln, double base_error, double indel_error) { if (base_error == 0 && indel_error == 0) return aln; string bases = "ATGC"; uniform_real_distribution<double> rprob(0, 1); uniform_int_distribution<int> rbase(0, 3); Alignment mutaln; for (size_t i = 0; i < aln.path().mapping_size(); ++i) { auto& orig_mapping = aln.path().mapping(i); Mapping* new_mapping = mutaln.mutable_path()->add_mapping(); *new_mapping->mutable_position() = orig_mapping.position(); // for each edit in the mapping for (size_t j = 0; j < orig_mapping.edit_size(); ++j) { auto& orig_edit = orig_mapping.edit(j); auto new_edits = mutate_edit(orig_edit, make_pos_t(orig_mapping.position()), base_error, indel_error, bases, rprob, rbase); for (auto& edit : new_edits) { *new_mapping->add_edit() = edit; } } } // re-derive the alignment's sequence mutaln = simplify(mutaln); mutaln.set_sequence(alignment_seq(mutaln)); mutaln.set_name(aln.name()); return mutaln; }
Alignment strip_from_start(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from start" << endl; res.set_sequence(aln.sequence().substr(drop)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), drop).second; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from start 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
Alignment strip_from_end(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from end" << endl; size_t cut_at = aln.sequence().size()-drop; //cerr << "Cut at " << cut_at << endl; res.set_sequence(aln.sequence().substr(0, cut_at)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), cut_at).first; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from end 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }