Exemplo n.º 1
0
unsigned UngappedAligner::alignUngapped(
    FragmentMetadata &fragmentMetadata,
    Cigar &cigarBuffer,
    const flowcell::ReadMetadata &readMetadata,
    const matchSelector::FragmentSequencingAdapterClipper &adapterClipper,
    const reference::ContigList &contigList,
    const isaac::reference::ContigAnnotations &contigAnnotations) const
{
    const unsigned cigarOffset = cigarBuffer.size();

// Don't reset alignment to preserve the seed-based anchors.
//    fragmentMetadata.resetAlignment();
    ISAAC_ASSERT_MSG(!fragmentMetadata.isAligned(), "alignUngapped is expected to be performend on a clean fragment");
    fragmentMetadata.resetClipping();

    const reference::Contig &contig = contigList[fragmentMetadata.contigId];

    const Read &read = fragmentMetadata.getRead();
    const bool reverse = fragmentMetadata.reverse;
    const std::vector<char> &sequence = read.getStrandSequence(reverse);
    const reference::Contig &reference = contig;

    std::vector<char>::const_iterator sequenceBegin = sequence.begin();
    std::vector<char>::const_iterator sequenceEnd = sequence.end();

    adapterClipper.clip(contig, fragmentMetadata, sequenceBegin, sequenceEnd);
    clipReadMasking(read, fragmentMetadata, sequenceBegin, sequenceEnd);

    clipReference(reference.size(), fragmentMetadata, sequenceBegin, sequenceEnd);

    const unsigned firstMappedBaseOffset = std::distance(sequence.begin(), sequenceBegin);
    if (firstMappedBaseOffset)
    {
        cigarBuffer.addOperation(firstMappedBaseOffset, Cigar::SOFT_CLIP);
    }

    const unsigned mappedBases = std::distance(sequenceBegin, sequenceEnd);
    if (mappedBases)
    {
        const Cigar::OpCode opCode = Cigar::ALIGN;
        cigarBuffer.addOperation(mappedBases, opCode);
    }

    const unsigned clipEndBases = std::distance(sequenceEnd, sequence.end());
    if (clipEndBases)
    {
        cigarBuffer.addOperation(clipEndBases, Cigar::SOFT_CLIP);
    }

    const unsigned ret = updateFragmentCigar(
        readMetadata, contigList, contigAnnotations, fragmentMetadata,
        fragmentMetadata.reverse, fragmentMetadata.contigId, fragmentMetadata.position, cigarBuffer, cigarOffset);

    if (!ret)
    {
        fragmentMetadata.setUnaligned();
    }

    return ret;
}
bool SemialignedEndsClipper::clipRightSide(
    const std::vector<reference::Contig> &contigList,
    FragmentMetadata &fragmentMetadata)
{
    const Read &read = fragmentMetadata.getRead();

    std::reverse_iterator<std::vector<char>::const_iterator> sequenceRBegin(
        read.getStrandSequence(fragmentMetadata.reverse).end());
    unsigned oldCigarOffset = fragmentMetadata.cigarOffset;
    unsigned oldCigarLength = fragmentMetadata.cigarLength;
    Cigar::Component operation = Cigar::decode(
        fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1));
    unsigned softClippedEndBases = 0;
    if (Cigar::SOFT_CLIP == operation.second)
    {
        if (2 > fragmentMetadata.cigarLength)
        {
            //when the adapter sequence
            // happens to be at the extremities of the read, the whole read gets clipped away.
            return false;
        }

        --oldCigarLength;
        softClippedEndBases = operation.first;
        sequenceRBegin += operation.first;
        operation = Cigar::decode(fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1));
    }

    if (Cigar::ALIGN == operation.second)
    {
        unsigned mappedEndBases = operation.first;
        std::reverse_iterator<std::vector<char>::const_iterator> sequenceREnd = sequenceRBegin + mappedEndBases;

        const std::vector<char> &reference = contigList.at(fragmentMetadata.contigId).forward_;
        std::reverse_iterator<std::vector<char>::const_iterator> referenceRBegin(reference.begin() + fragmentMetadata.position +
            fragmentMetadata.getObservedLength());
        std::reverse_iterator<std::vector<char>::const_iterator> referenceREnd(reference.begin());

        std::pair<unsigned, unsigned> clipped = clipMismatches<CONSECUTIVE_MATCHES_MIN>(sequenceRBegin, sequenceREnd,
                                                                   referenceRBegin, referenceREnd,
                                                                   [](char c){return c;});

        if (clipped.first)
        {
            fragmentMetadata.cigarOffset = cigarBuffer_.size();
            fragmentMetadata.observedLength -= clipped.first;
            softClippedEndBases += clipped.first;
            fragmentMetadata.editDistance -= clipped.second;
            mappedEndBases -= clipped.first;
            cigarBuffer_.insert(cigarBuffer_.end(),
                                fragmentMetadata.cigarBuffer->begin() + oldCigarOffset,
                                fragmentMetadata.cigarBuffer->begin() + oldCigarOffset + oldCigarLength - 1);
            cigarBuffer_.addOperation(mappedEndBases, Cigar::ALIGN);
            cigarBuffer_.addOperation(softClippedEndBases, Cigar::SOFT_CLIP);
            fragmentMetadata.cigarBuffer = &cigarBuffer_;
            fragmentMetadata.cigarLength = cigarBuffer_.size() - fragmentMetadata.cigarOffset;
            return true;
        }
    }
    return false;
}