void DebugOutput::process_one_alignment(const AlignmentData& mate_1, const AlignmentData& mate_2) { DbgAlignment al; DbgInfo info; uint32 mapq; const AlignmentData& anchor = (mate_1.best->mate() ? mate_2 : mate_1); const AlignmentData& opposite_mate = (mate_1.best->mate() ? mate_1 : mate_2); // compute the alignment's mapping quality // we always compute the mapq using the anchor, so this is only done once per pair if (mate_1.best->is_aligned()) { mapq = mapq_evaluator->compute_mapq(anchor, opposite_mate); } else { mapq = 0; } // process the first mate process_one_mate(al, info, mate_1, mate_2, mapq); output_alignment(fp, al, info); if (alignment_type == PAIRED_END) { // process the second mate process_one_mate(al, info, mate_2, mate_1, mapq); output_alignment(fp_opposite_mate, al, info); } // track per-alignment statistics iostats.track_alignment_statistics(anchor, opposite_mate, mapq); }
uint32 BamOutput::process_one_alignment(AlignmentData& alignment, AlignmentData& mate) { // BAM alignment header struct BAM_alignment alnh; // data block with actual alignment info struct BAM_alignment_data_block alnd; uint8 mapq; // xxxnsubtil: this is probably not needed memset(&alnh, 0, sizeof(alnh)); memset(&alnd, 0, sizeof(alnd)); const uint32 ref_cigar_len = reference_cigar_length(alignment.cigar, alignment.cigar_len); // setup alignment information const uint32 seq_index = uint32(std::upper_bound( bnt.sequence_index, bnt.sequence_index + bnt.n_seqs, alignment.cigar_pos ) - bnt.sequence_index) - 1u; // fill out read name and length alnd.name = alignment.read_name; alnh.bin_mq_nl = (uint8)(strlen(alnd.name) + 1); // fill out read data // (PackedStream is not used here to avoid doing a read-modify-write on every BP) { for(uint32 i = 0; i < alignment.read_len; i += 2) { uint8 out_bp; uint8 s; if (alignment.aln->m_rc) { nvbio::complement_functor<4> complement; s = complement(alignment.read_data[i]); s = encode_bp(s); out_bp = s << 4; if (i + 1 < alignment.read_len) { s = complement(alignment.read_data[(i + 1)]); s = encode_bp(s); out_bp |= s; } } else { s = alignment.read_data[alignment.read_len - i - 1]; s = encode_bp(s); out_bp = s << 4; if (i + 1 < alignment.read_len) { s = alignment.read_data[alignment.read_len - (i + 1) - 1]; s = encode_bp(s); out_bp |= s; } } alnd.seq[i / 2] = out_bp; } alnh.l_seq = alignment.read_len; } // fill out quality data for(uint32 i = 0; i < alignment.read_len; i++) { char q; if (alignment.aln->m_rc) q = alignment.qual[i]; else q = alignment.qual[alignment.read_len - i - 1]; alnd.qual[i] = q; } // compute mapping quality mapq = alignment.mapq; // check if we're mapped if (alignment.aln->is_aligned() == false || mapq < mapq_filter) { alnh.refID = -1; alnh.pos = -1; alnh.flag_nc = BAM_FLAGS_UNMAPPED; alnh.next_refID = -1; alnh.next_pos = -1; // mark the md string as empty alnd.md_string[0] = '\0'; // unaligned reads don't need anything else; output and return output_alignment(alnh, alnd); return 0; } // compute alignment flags alnh.flag_nc = (alignment.aln->mate() ? BAM_FLAGS_READ_2 : BAM_FLAGS_READ_1); if (alignment.aln->m_rc) alnh.flag_nc |= BAM_FLAGS_REVERSE; if (alignment_type == PAIRED_END) { alnh.flag_nc |= BAM_FLAGS_PAIRED; if (mate.aln->is_concordant()) alnh.flag_nc |= BAM_FLAGS_PROPER_PAIR; if (!mate.aln->is_aligned()) alnh.flag_nc |= BAM_FLAGS_MATE_UNMAPPED; if (mate.aln->is_rc()) alnh.flag_nc |= BAM_FLAGS_MATE_REVERSE; } if (alignment.cigar_pos + ref_cigar_len > bnt.sequence_index[ seq_index+1 ]) { // flag UNMAP as this alignment bridges two adjacent reference sequences // xxxnsubtil: we still output the rest of the alignment data, does that make sense? alnh.flag_nc |= BAM_FLAGS_UNMAPPED; // make this look like a real unmapped alignment alnh.refID = -1; alnh.pos = -1; alnh.next_refID = -1; alnh.next_pos = -1; alnd.md_string[0] = '\0'; output_alignment(alnh, alnd); return 0; } // fill out alignment reference ID and position alnh.refID = seq_index; alnh.pos = uint32(alignment.cigar_pos - bnt.sequence_index[ seq_index ]); // write out mapq alnh.bin_mq_nl |= (mapq << 8); // BAM alignment bin is always 0 // xxxnsubtil: is the bin useful? // fill out the cigar string... uint32 computed_cigar_len = generate_cigar(alnh, alnd, alignment); // ... and make sure it makes (some) sense if (computed_cigar_len != alignment.read_len) { log_error(stderr, "BAM output : cigar length doesn't match read %u (%u != %u)\n", alignment.read_id /* xxxnsubtil: global_read_id */, computed_cigar_len, alignment.read_len); return mapq; } if (alignment_type == PAIRED_END) { if (mate.aln->is_aligned()) { const uint32 o_ref_cigar_len = reference_cigar_length(mate.cigar, mate.cigar_len); // setup alignment information for the opposite mate const uint32 o_seq_index = uint32(std::upper_bound( bnt.sequence_index, bnt.sequence_index + bnt.n_seqs, mate.cigar_pos ) - bnt.sequence_index) - 1u; alnh.next_refID = uint32(o_seq_index - seq_index); // next_pos here is equivalent to SAM's PNEXT, // but it's zero-based in BAM and one-based in SAM alnh.next_pos = int32( mate.cigar_pos - bnt.sequence_index[ o_seq_index ] ); if (o_seq_index != seq_index) alnh.tlen = 0; else { alnh.tlen = nvbio::max(mate.cigar_pos + o_ref_cigar_len, alignment.cigar_pos + ref_cigar_len) - nvbio::min(mate.cigar_pos, alignment.cigar_pos); if (mate.cigar_pos < alignment.cigar_pos) alnh.tlen = -alnh.tlen; } } else { // other mate is unmapped // xxxnsubtil: this follows the same convention that was documented in the old code for SAM, // except that BAM does not have an encoding for '=' here // it's somewhat unclear whether this is correct alnh.next_refID = alnh.refID; alnh.next_pos = int32( alignment.cigar_pos - bnt.sequence_index[ seq_index ] ); // xxx: check whether this is really correct alnh.tlen = 0; } } else { alnh.next_refID = -1; alnh.next_pos = -1; alnh.tlen = 0; } // fill out tag data alnd.ed = alignment.aln->ed(); alnd.score = alignment.aln->score(); alnd.second_score_valid = false; // TODO! generate_md_string(alnh, alnd, alignment); // write out the alignment output_alignment(alnh, alnd); return mapq; }