Example #1
0
unsigned UngappedAligner::alignUngapped(
    FragmentMetadata &fragmentMetadata,
    Cigar &cigarBuffer,
    const flowcell::ReadMetadata &readMetadata,
    const matchSelector::FragmentSequencingAdapterClipper &adapterClipper,
    const reference::ContigList &contigList,
    const isaac::reference::ContigAnnotations &contigAnnotations) const
{
    const unsigned cigarOffset = cigarBuffer.size();

// Don't reset alignment to preserve the seed-based anchors.
//    fragmentMetadata.resetAlignment();
    ISAAC_ASSERT_MSG(!fragmentMetadata.isAligned(), "alignUngapped is expected to be performend on a clean fragment");
    fragmentMetadata.resetClipping();

    const reference::Contig &contig = contigList[fragmentMetadata.contigId];

    const Read &read = fragmentMetadata.getRead();
    const bool reverse = fragmentMetadata.reverse;
    const std::vector<char> &sequence = read.getStrandSequence(reverse);
    const reference::Contig &reference = contig;

    std::vector<char>::const_iterator sequenceBegin = sequence.begin();
    std::vector<char>::const_iterator sequenceEnd = sequence.end();

    adapterClipper.clip(contig, fragmentMetadata, sequenceBegin, sequenceEnd);
    clipReadMasking(read, fragmentMetadata, sequenceBegin, sequenceEnd);

    clipReference(reference.size(), fragmentMetadata, sequenceBegin, sequenceEnd);

    const unsigned firstMappedBaseOffset = std::distance(sequence.begin(), sequenceBegin);
    if (firstMappedBaseOffset)
    {
        cigarBuffer.addOperation(firstMappedBaseOffset, Cigar::SOFT_CLIP);
    }

    const unsigned mappedBases = std::distance(sequenceBegin, sequenceEnd);
    if (mappedBases)
    {
        const Cigar::OpCode opCode = Cigar::ALIGN;
        cigarBuffer.addOperation(mappedBases, opCode);
    }

    const unsigned clipEndBases = std::distance(sequenceEnd, sequence.end());
    if (clipEndBases)
    {
        cigarBuffer.addOperation(clipEndBases, Cigar::SOFT_CLIP);
    }

    const unsigned ret = updateFragmentCigar(
        readMetadata, contigList, contigAnnotations, fragmentMetadata,
        fragmentMetadata.reverse, fragmentMetadata.contigId, fragmentMetadata.position, cigarBuffer, cigarOffset);

    if (!ret)
    {
        fragmentMetadata.setUnaligned();
    }

    return ret;
}
Example #2
0
/**
 * \brief Sets the sequence iterators according to the masking information stored in the read.
 *        Adjusts fragment.position to point at the first non-clipped base.
 *
 */
void AlignerBase::clipReadMasking(
    const alignment::Read &read,
    FragmentMetadata &fragment,
    std::vector<char>::const_iterator &sequenceBegin,
    std::vector<char>::const_iterator &sequenceEnd)
{
    std::vector<char>::const_iterator maskedBegin;
    std::vector<char>::const_iterator maskedEnd;
    if (fragment.reverse)
    {
        maskedBegin = read.getReverseSequence().begin() + read.getEndCyclesMasked();
        maskedEnd = read.getReverseSequence().end() - read.getBeginCyclesMasked();
    }
    else
    {
        maskedBegin = read.getForwardSequence().begin() + read.getBeginCyclesMasked();
        maskedEnd = read.getForwardSequence().end() - read.getEndCyclesMasked();
    }

    if (maskedBegin > sequenceBegin)
    {
        fragment.incrementClipLeft(std::distance(sequenceBegin, maskedBegin));
        sequenceBegin = maskedBegin;
    }

    if (maskedEnd < sequenceEnd)
    {
        fragment.incrementClipRight(std::distance(maskedEnd, sequenceEnd));
        sequenceEnd = maskedEnd;
    }
}
Example #3
0
bool alignsCorrectly(const std::size_t readNumber, const FragmentMetadata &fragment)
{
    const reference::ReferencePosition oriPos = getAlignmentPositionFromName(readNumber, fragment);
    if (oriPos.isTooManyMatch())
    {
        return true;
    }
//    ISAAC_THREAD_CERR << "oriPos:" << oriPos << " name " << common::makeFastIoString(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()) << std::endl;
    return fragment.isReverse() == oriPos.reverse() &&
        fragment.getContigId() == oriPos.getContigId() &&
        uint64_t(fragment.getPosition()) == oriPos.getPosition();
}
Example #4
0
unsigned AlignerBase::updateFragmentCigar(
    const flowcell::ReadMetadataList &readMetadataList,
    const reference::ContigList &contigList,
    const isaac::reference::ContigAnnotations &contigAnnotations,
    FragmentMetadata &fragmentMetadata,
    unsigned contigId,
    const long strandPosition,
    const Cigar &cigarBuffer,
    const unsigned cigarOffset) const
{
    return fragmentMetadata.updateAlignment(
        alignmentCfg_,
        readMetadataList,
        contigList, contigAnnotations,
        contigId, strandPosition,
        cigarBuffer, cigarOffset);
}
/**
 * \return true, if the clipping changed the alignment position
 */
bool SemialignedEndsClipper::clip(
    const std::vector<reference::Contig> &contigList,
    FragmentMetadata &fragmentMetadata)
{
    if (!fragmentMetadata.isAligned() || fragmentMetadata.gapCount)
    {
        return false;
    }

    bool ret = clipLeftSide(contigList, fragmentMetadata);
    if (clipRightSide(contigList, fragmentMetadata))
    {
        ret = true;
    }

    if (ret)
    {
        ISAAC_THREAD_CERR_DEV_TRACE(" SemialignedEndsClipper::clip: " << fragmentMetadata);
    }
    return ret;
}
Example #6
0
reference::ReferencePosition getAlignmentPositionFromName(const std::size_t readNumber, const FragmentMetadata &fragment)
{
    // numbers are 1-based
    const auto name = getReadName(readNumber - 1, fragment);

    if (name.second == name.first)
    {
        return reference::ReferencePosition(reference::ReferencePosition::TooManyMatch);
    }

    if ('u' == *name.first)
    {
        ISAAC_ASSERT_MSG(false, common::makeFastIoString(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()) << " " << fragment);
        return reference::ReferencePosition(reference::ReferencePosition::NoMatch);
    }
    return reference::ReferencePosition(
        std::atol(&*name.first + 2),
        std::atol(&*std::find(name.first + 2, name.second, ':') + 1),
        false,
        'r' == *name.first);
}
Example #7
0
std::pair<BclClusters::const_iterator, BclClusters::const_iterator> getReadName(const std::size_t readIndex, const FragmentMetadata &fragment)
{
    std::pair<BclClusters::const_iterator, BclClusters::const_iterator> ret = {fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()};
    if (ret.second == ret.first)
    {
        return ret;
    }

    if (!readIndex)
    {
        ret.first = fragment.getCluster().nameBegin();
        ret.second = std::find(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd(), '-');
    }
    else
    {
        ret.first = std::find(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd(), '-') + 1;
        ret.second = fragment.getCluster().nameEnd();
    }

    return ret;
}
bool SemialignedEndsClipper::clipRightSide(
    const std::vector<reference::Contig> &contigList,
    FragmentMetadata &fragmentMetadata)
{
    const Read &read = fragmentMetadata.getRead();

    std::reverse_iterator<std::vector<char>::const_iterator> sequenceRBegin(
        read.getStrandSequence(fragmentMetadata.reverse).end());
    unsigned oldCigarOffset = fragmentMetadata.cigarOffset;
    unsigned oldCigarLength = fragmentMetadata.cigarLength;
    Cigar::Component operation = Cigar::decode(
        fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1));
    unsigned softClippedEndBases = 0;
    if (Cigar::SOFT_CLIP == operation.second)
    {
        if (2 > fragmentMetadata.cigarLength)
        {
            //when the adapter sequence
            // happens to be at the extremities of the read, the whole read gets clipped away.
            return false;
        }

        --oldCigarLength;
        softClippedEndBases = operation.first;
        sequenceRBegin += operation.first;
        operation = Cigar::decode(fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1));
    }

    if (Cigar::ALIGN == operation.second)
    {
        unsigned mappedEndBases = operation.first;
        std::reverse_iterator<std::vector<char>::const_iterator> sequenceREnd = sequenceRBegin + mappedEndBases;

        const std::vector<char> &reference = contigList.at(fragmentMetadata.contigId).forward_;
        std::reverse_iterator<std::vector<char>::const_iterator> referenceRBegin(reference.begin() + fragmentMetadata.position +
            fragmentMetadata.getObservedLength());
        std::reverse_iterator<std::vector<char>::const_iterator> referenceREnd(reference.begin());

        std::pair<unsigned, unsigned> clipped = clipMismatches<CONSECUTIVE_MATCHES_MIN>(sequenceRBegin, sequenceREnd,
                                                                   referenceRBegin, referenceREnd,
                                                                   [](char c){return c;});

        if (clipped.first)
        {
            fragmentMetadata.cigarOffset = cigarBuffer_.size();
            fragmentMetadata.observedLength -= clipped.first;
            softClippedEndBases += clipped.first;
            fragmentMetadata.editDistance -= clipped.second;
            mappedEndBases -= clipped.first;
            cigarBuffer_.insert(cigarBuffer_.end(),
                                fragmentMetadata.cigarBuffer->begin() + oldCigarOffset,
                                fragmentMetadata.cigarBuffer->begin() + oldCigarOffset + oldCigarLength - 1);
            cigarBuffer_.addOperation(mappedEndBases, Cigar::ALIGN);
            cigarBuffer_.addOperation(softClippedEndBases, Cigar::SOFT_CLIP);
            fragmentMetadata.cigarBuffer = &cigarBuffer_;
            fragmentMetadata.cigarLength = cigarBuffer_.size() - fragmentMetadata.cigarOffset;
            return true;
        }
    }
    return false;
}