示例#1
0
void DebugOutput::process_one_alignment(const AlignmentData& mate_1, const AlignmentData& mate_2)
{
    DbgAlignment al;
    DbgInfo info;
    uint32 mapq;

    const AlignmentData& anchor = (mate_1.best->mate() ? mate_2 : mate_1);
    const AlignmentData& opposite_mate = (mate_1.best->mate() ? mate_1 : mate_2);

    // compute the alignment's mapping quality
    // we always compute the mapq using the anchor, so this is only done once per pair
    if (mate_1.best->is_aligned())
    {
        mapq = mapq_evaluator->compute_mapq(anchor, opposite_mate);
    } else {
        mapq = 0;
    }

    // process the first mate
    process_one_mate(al, info, mate_1, mate_2, mapq);
    output_alignment(fp, al, info);

    if (alignment_type == PAIRED_END)
    {
        // process the second mate
        process_one_mate(al, info, mate_2, mate_1, mapq);
        output_alignment(fp_opposite_mate, al, info);
    }

    // track per-alignment statistics
    iostats.track_alignment_statistics(anchor, opposite_mate, mapq);
}
示例#2
0
uint32 BamOutput::process_one_alignment(AlignmentData& alignment, AlignmentData& mate)
{
    // BAM alignment header
    struct BAM_alignment alnh;
    // data block with actual alignment info
    struct BAM_alignment_data_block alnd;

    uint8 mapq;

    // xxxnsubtil: this is probably not needed
    memset(&alnh, 0, sizeof(alnh));
    memset(&alnd, 0, sizeof(alnd));

    const uint32 ref_cigar_len = reference_cigar_length(alignment.cigar, alignment.cigar_len);

    // setup alignment information
   const uint32 seq_index = uint32(std::upper_bound(
        bnt.sequence_index,
        bnt.sequence_index + bnt.n_seqs,
        alignment.cigar_pos ) - bnt.sequence_index) - 1u;

    // fill out read name and length
    alnd.name = alignment.read_name;
    alnh.bin_mq_nl = (uint8)(strlen(alnd.name) + 1);

    // fill out read data
    // (PackedStream is not used here to avoid doing a read-modify-write on every BP)
    {
        for(uint32 i = 0; i < alignment.read_len; i += 2)
        {
            uint8 out_bp;
            uint8 s;

            if (alignment.aln->m_rc)
            {
                nvbio::complement_functor<4> complement;

                s = complement(alignment.read_data[i]);
                s = encode_bp(s);
                out_bp = s << 4;

                if (i + 1 < alignment.read_len)
                {
                    s = complement(alignment.read_data[(i + 1)]);
                    s = encode_bp(s);
                    out_bp |= s;
                }
            } else {
                s = alignment.read_data[alignment.read_len - i - 1];
                s = encode_bp(s);
                out_bp = s << 4;

                if (i + 1 < alignment.read_len)
                {
                    s = alignment.read_data[alignment.read_len - (i + 1) - 1];
                    s = encode_bp(s);
                    out_bp |= s;
                }
            }

            alnd.seq[i / 2] = out_bp;
        }

        alnh.l_seq = alignment.read_len;
    }

    // fill out quality data
    for(uint32 i = 0; i < alignment.read_len; i++)
    {
        char q;

        if (alignment.aln->m_rc)
            q = alignment.qual[i];
        else
            q = alignment.qual[alignment.read_len - i - 1];

        alnd.qual[i] = q;
    }

    // compute mapping quality
    mapq = alignment.mapq;

    // check if we're mapped
    if (alignment.aln->is_aligned() == false || mapq < mapq_filter)
    {
        alnh.refID = -1;
        alnh.pos = -1;
        alnh.flag_nc = BAM_FLAGS_UNMAPPED;
        alnh.next_refID = -1;
        alnh.next_pos = -1;
        // mark the md string as empty
        alnd.md_string[0] = '\0';

        // unaligned reads don't need anything else; output and return
        output_alignment(alnh, alnd);
        return 0;
    }

    // compute alignment flags
    alnh.flag_nc = (alignment.aln->mate() ? BAM_FLAGS_READ_2 : BAM_FLAGS_READ_1);
    if (alignment.aln->m_rc)
        alnh.flag_nc |= BAM_FLAGS_REVERSE;

    if (alignment_type == PAIRED_END)
    {
        alnh.flag_nc |= BAM_FLAGS_PAIRED;

        if (mate.aln->is_concordant())
            alnh.flag_nc |= BAM_FLAGS_PROPER_PAIR;

        if (!mate.aln->is_aligned())
            alnh.flag_nc |= BAM_FLAGS_MATE_UNMAPPED;

        if (mate.aln->is_rc())
            alnh.flag_nc |= BAM_FLAGS_MATE_REVERSE;
    }

    if (alignment.cigar_pos + ref_cigar_len > bnt.sequence_index[ seq_index+1 ])
    {
        // flag UNMAP as this alignment bridges two adjacent reference sequences
        // xxxnsubtil: we still output the rest of the alignment data, does that make sense?
        alnh.flag_nc |= BAM_FLAGS_UNMAPPED;

        // make this look like a real unmapped alignment
        alnh.refID = -1;
        alnh.pos = -1;
        alnh.next_refID = -1;
        alnh.next_pos = -1;
        alnd.md_string[0] = '\0';

        output_alignment(alnh, alnd);
        return 0;
    }

    // fill out alignment reference ID and position
    alnh.refID = seq_index;
    alnh.pos = uint32(alignment.cigar_pos - bnt.sequence_index[ seq_index ]);

    // write out mapq
    alnh.bin_mq_nl |= (mapq << 8);
    // BAM alignment bin is always 0
    // xxxnsubtil: is the bin useful?

    // fill out the cigar string...
    uint32 computed_cigar_len = generate_cigar(alnh, alnd, alignment);
    // ... and make sure it makes (some) sense
    if (computed_cigar_len != alignment.read_len)
    {
        log_error(stderr, "BAM output : cigar length doesn't match read %u (%u != %u)\n",
                  alignment.read_id /* xxxnsubtil: global_read_id */,
                  computed_cigar_len, alignment.read_len);
        return mapq;
    }

    if (alignment_type == PAIRED_END)
    {
        if (mate.aln->is_aligned())
        {
            const uint32 o_ref_cigar_len = reference_cigar_length(mate.cigar, mate.cigar_len);

            // setup alignment information for the opposite mate
            const uint32 o_seq_index = uint32(std::upper_bound(
                bnt.sequence_index,
                bnt.sequence_index + bnt.n_seqs,
                mate.cigar_pos ) - bnt.sequence_index) - 1u;

            alnh.next_refID = uint32(o_seq_index - seq_index);
            // next_pos here is equivalent to SAM's PNEXT,
            // but it's zero-based in BAM and one-based in SAM
            alnh.next_pos = int32( mate.cigar_pos - bnt.sequence_index[ o_seq_index ] );

            if (o_seq_index != seq_index)
                alnh.tlen = 0;
            else
            {
                alnh.tlen = nvbio::max(mate.cigar_pos + o_ref_cigar_len,
                                       alignment.cigar_pos + ref_cigar_len) -
                            nvbio::min(mate.cigar_pos, alignment.cigar_pos);

                if (mate.cigar_pos < alignment.cigar_pos)
                    alnh.tlen = -alnh.tlen;
            }
        }
        else
        {
            // other mate is unmapped
            // xxxnsubtil: this follows the same convention that was documented in the old code for SAM,
            // except that BAM does not have an encoding for '=' here
            // it's somewhat unclear whether this is correct
            alnh.next_refID = alnh.refID;
            alnh.next_pos = int32( alignment.cigar_pos - bnt.sequence_index[ seq_index ] );
            // xxx: check whether this is really correct
            alnh.tlen = 0;
        }
    } else {
        alnh.next_refID = -1;
        alnh.next_pos = -1;
        alnh.tlen = 0;
    }

    // fill out tag data
    alnd.ed = alignment.aln->ed();
    alnd.score = alignment.aln->score();

    alnd.second_score_valid = false; // TODO!

    generate_md_string(alnh, alnd, alignment);

    // write out the alignment
    output_alignment(alnh, alnd);

    return mapq;
}