unsigned UngappedAligner::alignUngapped( FragmentMetadata &fragmentMetadata, Cigar &cigarBuffer, const flowcell::ReadMetadata &readMetadata, const matchSelector::FragmentSequencingAdapterClipper &adapterClipper, const reference::ContigList &contigList, const isaac::reference::ContigAnnotations &contigAnnotations) const { const unsigned cigarOffset = cigarBuffer.size(); // Don't reset alignment to preserve the seed-based anchors. // fragmentMetadata.resetAlignment(); ISAAC_ASSERT_MSG(!fragmentMetadata.isAligned(), "alignUngapped is expected to be performend on a clean fragment"); fragmentMetadata.resetClipping(); const reference::Contig &contig = contigList[fragmentMetadata.contigId]; const Read &read = fragmentMetadata.getRead(); const bool reverse = fragmentMetadata.reverse; const std::vector<char> &sequence = read.getStrandSequence(reverse); const reference::Contig &reference = contig; std::vector<char>::const_iterator sequenceBegin = sequence.begin(); std::vector<char>::const_iterator sequenceEnd = sequence.end(); adapterClipper.clip(contig, fragmentMetadata, sequenceBegin, sequenceEnd); clipReadMasking(read, fragmentMetadata, sequenceBegin, sequenceEnd); clipReference(reference.size(), fragmentMetadata, sequenceBegin, sequenceEnd); const unsigned firstMappedBaseOffset = std::distance(sequence.begin(), sequenceBegin); if (firstMappedBaseOffset) { cigarBuffer.addOperation(firstMappedBaseOffset, Cigar::SOFT_CLIP); } const unsigned mappedBases = std::distance(sequenceBegin, sequenceEnd); if (mappedBases) { const Cigar::OpCode opCode = Cigar::ALIGN; cigarBuffer.addOperation(mappedBases, opCode); } const unsigned clipEndBases = std::distance(sequenceEnd, sequence.end()); if (clipEndBases) { cigarBuffer.addOperation(clipEndBases, Cigar::SOFT_CLIP); } const unsigned ret = updateFragmentCigar( readMetadata, contigList, contigAnnotations, fragmentMetadata, fragmentMetadata.reverse, fragmentMetadata.contigId, fragmentMetadata.position, cigarBuffer, cigarOffset); if (!ret) { fragmentMetadata.setUnaligned(); } return ret; }
/** * \brief Sets the sequence iterators according to the masking information stored in the read. * Adjusts fragment.position to point at the first non-clipped base. * */ void AlignerBase::clipReadMasking( const alignment::Read &read, FragmentMetadata &fragment, std::vector<char>::const_iterator &sequenceBegin, std::vector<char>::const_iterator &sequenceEnd) { std::vector<char>::const_iterator maskedBegin; std::vector<char>::const_iterator maskedEnd; if (fragment.reverse) { maskedBegin = read.getReverseSequence().begin() + read.getEndCyclesMasked(); maskedEnd = read.getReverseSequence().end() - read.getBeginCyclesMasked(); } else { maskedBegin = read.getForwardSequence().begin() + read.getBeginCyclesMasked(); maskedEnd = read.getForwardSequence().end() - read.getEndCyclesMasked(); } if (maskedBegin > sequenceBegin) { fragment.incrementClipLeft(std::distance(sequenceBegin, maskedBegin)); sequenceBegin = maskedBegin; } if (maskedEnd < sequenceEnd) { fragment.incrementClipRight(std::distance(maskedEnd, sequenceEnd)); sequenceEnd = maskedEnd; } }
bool alignsCorrectly(const std::size_t readNumber, const FragmentMetadata &fragment) { const reference::ReferencePosition oriPos = getAlignmentPositionFromName(readNumber, fragment); if (oriPos.isTooManyMatch()) { return true; } // ISAAC_THREAD_CERR << "oriPos:" << oriPos << " name " << common::makeFastIoString(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()) << std::endl; return fragment.isReverse() == oriPos.reverse() && fragment.getContigId() == oriPos.getContigId() && uint64_t(fragment.getPosition()) == oriPos.getPosition(); }
unsigned AlignerBase::updateFragmentCigar( const flowcell::ReadMetadataList &readMetadataList, const reference::ContigList &contigList, const isaac::reference::ContigAnnotations &contigAnnotations, FragmentMetadata &fragmentMetadata, unsigned contigId, const long strandPosition, const Cigar &cigarBuffer, const unsigned cigarOffset) const { return fragmentMetadata.updateAlignment( alignmentCfg_, readMetadataList, contigList, contigAnnotations, contigId, strandPosition, cigarBuffer, cigarOffset); }
/** * \return true, if the clipping changed the alignment position */ bool SemialignedEndsClipper::clip( const std::vector<reference::Contig> &contigList, FragmentMetadata &fragmentMetadata) { if (!fragmentMetadata.isAligned() || fragmentMetadata.gapCount) { return false; } bool ret = clipLeftSide(contigList, fragmentMetadata); if (clipRightSide(contigList, fragmentMetadata)) { ret = true; } if (ret) { ISAAC_THREAD_CERR_DEV_TRACE(" SemialignedEndsClipper::clip: " << fragmentMetadata); } return ret; }
reference::ReferencePosition getAlignmentPositionFromName(const std::size_t readNumber, const FragmentMetadata &fragment) { // numbers are 1-based const auto name = getReadName(readNumber - 1, fragment); if (name.second == name.first) { return reference::ReferencePosition(reference::ReferencePosition::TooManyMatch); } if ('u' == *name.first) { ISAAC_ASSERT_MSG(false, common::makeFastIoString(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()) << " " << fragment); return reference::ReferencePosition(reference::ReferencePosition::NoMatch); } return reference::ReferencePosition( std::atol(&*name.first + 2), std::atol(&*std::find(name.first + 2, name.second, ':') + 1), false, 'r' == *name.first); }
std::pair<BclClusters::const_iterator, BclClusters::const_iterator> getReadName(const std::size_t readIndex, const FragmentMetadata &fragment) { std::pair<BclClusters::const_iterator, BclClusters::const_iterator> ret = {fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd()}; if (ret.second == ret.first) { return ret; } if (!readIndex) { ret.first = fragment.getCluster().nameBegin(); ret.second = std::find(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd(), '-'); } else { ret.first = std::find(fragment.getCluster().nameBegin(), fragment.getCluster().nameEnd(), '-') + 1; ret.second = fragment.getCluster().nameEnd(); } return ret; }
bool SemialignedEndsClipper::clipRightSide( const std::vector<reference::Contig> &contigList, FragmentMetadata &fragmentMetadata) { const Read &read = fragmentMetadata.getRead(); std::reverse_iterator<std::vector<char>::const_iterator> sequenceRBegin( read.getStrandSequence(fragmentMetadata.reverse).end()); unsigned oldCigarOffset = fragmentMetadata.cigarOffset; unsigned oldCigarLength = fragmentMetadata.cigarLength; Cigar::Component operation = Cigar::decode( fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1)); unsigned softClippedEndBases = 0; if (Cigar::SOFT_CLIP == operation.second) { if (2 > fragmentMetadata.cigarLength) { //when the adapter sequence // happens to be at the extremities of the read, the whole read gets clipped away. return false; } --oldCigarLength; softClippedEndBases = operation.first; sequenceRBegin += operation.first; operation = Cigar::decode(fragmentMetadata.cigarBuffer->at(oldCigarOffset + oldCigarLength - 1)); } if (Cigar::ALIGN == operation.second) { unsigned mappedEndBases = operation.first; std::reverse_iterator<std::vector<char>::const_iterator> sequenceREnd = sequenceRBegin + mappedEndBases; const std::vector<char> &reference = contigList.at(fragmentMetadata.contigId).forward_; std::reverse_iterator<std::vector<char>::const_iterator> referenceRBegin(reference.begin() + fragmentMetadata.position + fragmentMetadata.getObservedLength()); std::reverse_iterator<std::vector<char>::const_iterator> referenceREnd(reference.begin()); std::pair<unsigned, unsigned> clipped = clipMismatches<CONSECUTIVE_MATCHES_MIN>(sequenceRBegin, sequenceREnd, referenceRBegin, referenceREnd, [](char c){return c;}); if (clipped.first) { fragmentMetadata.cigarOffset = cigarBuffer_.size(); fragmentMetadata.observedLength -= clipped.first; softClippedEndBases += clipped.first; fragmentMetadata.editDistance -= clipped.second; mappedEndBases -= clipped.first; cigarBuffer_.insert(cigarBuffer_.end(), fragmentMetadata.cigarBuffer->begin() + oldCigarOffset, fragmentMetadata.cigarBuffer->begin() + oldCigarOffset + oldCigarLength - 1); cigarBuffer_.addOperation(mappedEndBases, Cigar::ALIGN); cigarBuffer_.addOperation(softClippedEndBases, Cigar::SOFT_CLIP); fragmentMetadata.cigarBuffer = &cigarBuffer_; fragmentMetadata.cigarLength = cigarBuffer_.size() - fragmentMetadata.cigarOffset; return true; } } return false; }