string alignment_to_sam(const Alignment& alignment, const string& refseq, const int32_t refpos, const string& cigar, const string& mateseq, const int32_t matepos, const int32_t tlen) { stringstream sam; sam << (!alignment.name().empty() ? alignment.name() : "*") << "\t" << sam_flag(alignment) << "\t" << (refseq.empty() ? "*" : refseq) << "\t" << refpos + 1 << "\t" //<< (alignment.path().mapping_size() ? refpos + 1 : 0) << "\t" // positions are 1-based in SAM, 0 means unmapped << alignment.mapping_quality() << "\t" << (alignment.has_path() && alignment.path().mapping_size() ? cigar : "*") << "\t" << (mateseq == refseq ? "=" : mateseq) << "\t" << matepos + 1 << "\t" << tlen << "\t" << (!alignment.sequence().empty() ? alignment.sequence() : "*") << "\t"; // hack much? if (!alignment.quality().empty()) { const string& quality = alignment.quality(); for (int i = 0; i < quality.size(); ++i) { sam << quality_short_to_char(quality[i]); } } else { sam << "*"; //sam << string(alignment.sequence().size(), 'I'); } //<< (alignment.has_quality() ? string_quality_short_to_char(alignment.quality()) : string(alignment.sequence().size(), 'I')); if (!alignment.read_group().empty()) sam << "\tRG:Z:" << alignment.read_group(); sam << "\n"; return sam.str(); }
void Pileups::compute_from_alignment(VG& graph, Alignment& alignment) { // if we start reversed if (alignment.has_path() && alignment.path().mapping(0).position().is_reverse()) { alignment = reverse_alignment(alignment, (function<int64_t(int64_t)>) ([&graph](int64_t id) { return graph.get_node(id)->sequence().size(); })); } const Path& path = alignment.path(); int64_t read_offset = 0; for (int i = 0; i < path.mapping_size(); ++i) { const Mapping& mapping = path.mapping(i); if (graph.has_node(mapping.position().node_id())) { const Node* node = graph.get_node(mapping.position().node_id()); NodePileup* pileup = get_create(node->id()); int64_t node_offset = mapping.position().offset(); for (int j = 0; j < mapping.edit_size(); ++j) { const Edit& edit = mapping.edit(j); // process all pileups in edit. // update the offsets as we go compute_from_edit(*pileup, node_offset, read_offset, *node, alignment, mapping, edit); } } } assert(alignment.sequence().empty() || alignment.path().mapping_size() == 0 || read_offset == alignment.sequence().length()); }
Alignment strip_from_start(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from start" << endl; res.set_sequence(aln.sequence().substr(drop)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), drop).second; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from start 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
Alignment strip_from_end(const Alignment& aln, size_t drop) { if (!drop) return aln; Alignment res; res.set_name(aln.name()); res.set_score(aln.score()); //cerr << "drop " << drop << " from end" << endl; size_t cut_at = aln.sequence().size()-drop; //cerr << "Cut at " << cut_at << endl; res.set_sequence(aln.sequence().substr(0, cut_at)); if (!aln.has_path()) return res; *res.mutable_path() = cut_path(aln.path(), cut_at).first; assert(res.has_path()); if (alignment_to_length(res) != res.sequence().size()) { cerr << "failed!!! drop from end 轰" << endl; cerr << pb2json(res) << endl << endl; assert(false); } return res; }
// act like the path this is against is the reference // and generate an equivalent cigar string cigar_against_path(const Alignment& alignment) { vector<pair<int, char> > cigar; if (!alignment.has_path()) return ""; const Path& path = alignment.path(); int l = 0; for (const auto& mapping : path.mapping()) { mapping_cigar(mapping, cigar); } return cigar_string(cigar); }
Alignment reverse_alignment(const Alignment& aln, const function<int64_t(int64_t)>& node_length) { // We're going to reverse the alignment and all its mappings. // TODO: should we/can we do this in place? Alignment reversed = aln; reversed.set_sequence(reverse_complement(aln.sequence())); if(aln.has_path()) { // Now invert the order of the mappings, and for each mapping, flip the // is_reverse flag. The edits within mappings also get put in reverse // order, get their positions corrected, and get their sequences get // reverse complemented. *reversed.mutable_path() = reverse_path(aln.path(), node_length); } return reversed; }
int32_t sam_flag(const Alignment& alignment) { int16_t flag = 0; if (alignment.score() == 0) { // unmapped flag |= BAM_FUNMAP; } else { // correctly aligned flag |= BAM_FPROPER_PAIR; } // HACKZ -- you can't determine orientation from a single part of the mapping // unless the graph is a DAG if (alignment.has_path() && alignment.path().mapping(0).position().is_reverse()) { flag |= BAM_FREVERSE; } if (alignment.is_secondary()) { flag |= BAM_FSECONDARY; } return flag; }