void SAMOutput::SetSoftClip(T_AlignmentCandidate &alignment, T_Sequence &read, DNALength hardClipPrefix, DNALength hardClipSuffix, DNALength &softClipPrefix, DNALength &softClipSuffix) { DNALength qStart, qEnd; qStart = alignment.QAlignStart(); qEnd = alignment.QAlignEnd(); assert(qStart >= hardClipPrefix); softClipPrefix = alignment.QAlignStart() - hardClipPrefix; assert(alignment.QAlignEnd() + hardClipSuffix <= read.length); softClipSuffix = read.length - hardClipSuffix - alignment.QAlignEnd(); }
void SAMOutput::SetHardClip(T_AlignmentCandidate &alignment, T_Sequence &read, DNALength &prefixClip, DNALength &suffixClip) { // // Set the hard clipping assuming the read is in the forward // direction. // prefixClip = alignment.QAlignStart(); suffixClip = read.length - alignment.QAlignEnd(); if (alignment.tStrand == 1) { // // If the read is instead reverse, swap the clipping lengths. // std::swap(prefixClip, suffixClip); } }
void IntervalOutput::Print(T_AlignmentCandidate &alignment, std::ostream &outFile) { int mapQV = alignment.mapQV; int lastBlock = alignment.blocks.size() - 1; if (lastBlock < 0) return; outFile << alignment.qName << " " << alignment.tName << " " << alignment.score << " " << alignment.pctSimilarity << " " << alignment.qStrand << " " << alignment.QAlignStart() << " " << alignment.QAlignEnd() << " " << alignment.qLength << " " << alignment.tStrand << " " << alignment.TAlignStart() << " " << (alignment.tAlignedSeqPos + alignment.tPos + alignment.blocks[lastBlock].tPos + alignment.blocks[lastBlock].length) << " " << alignment.tLength << " " << mapQV << std::endl; //Remove the last four fields from m4 format. //<< " " << alignment.nCells << " " << alignment.clusterScore //<< " " << alignment.probScore << " " //<< alignment.numSignificantClusters }
void SAMOutput::SetAlignedSequence(T_AlignmentCandidate &alignment, T_Sequence &read, T_Sequence &alignedSeq, Clipping clipping) { // // In both no, and hard clipping, the dna sequence that is output // solely corresponds to the aligned sequence. // DNALength clippedReadLength = 0; DNALength clippedStartPos = 0; if (clipping == none or clipping == hard) { DNALength qStart = alignment.QAlignStart(); DNALength qEnd = alignment.QAlignEnd(); clippedReadLength = qEnd - qStart; clippedStartPos = qStart; } else if (clipping == soft) { clippedReadLength = read.length - read.lowQualityPrefix - read.lowQualitySuffix; clippedStartPos = read.lowQualityPrefix; } else if (clipping == subread) { clippedReadLength = read.subreadEnd - read.subreadStart; clippedStartPos = read.subreadStart; } else { std::cout <<" ERROR! The clipping must be none, hard, subread, or soft when setting the aligned sequence." << std::endl; assert(0); } // // Set the aligned sequence according to the clipping boundaries. // if (alignment.tStrand == 0) { alignedSeq.ReferenceSubstring(read, clippedStartPos, clippedReadLength); } else { T_Sequence subSeq; subSeq.ReferenceSubstring(read, clippedStartPos, clippedReadLength); subSeq.MakeRC(alignedSeq); assert(alignedSeq.deleteOnExit); } }
void SAMOutput::PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read, std::ostream &samFile, AlignmentContext &context, SupplementalQVList & qvList, Clipping clipping, bool cigarUseSeqMatch) { std::string cigarString; uint16_t flag; T_Sequence alignedSequence; DNALength prefixSoftClip = 0, suffixSoftClip = 0; DNALength prefixHardClip = 0, suffixHardClip = 0; CreateCIGARString(alignment, read, cigarString, clipping, prefixSoftClip, suffixSoftClip, prefixHardClip, suffixHardClip, cigarUseSeqMatch); SetAlignedSequence(alignment, read, alignedSequence, clipping); BuildFlag(alignment, context, flag); samFile << alignment.qName << "\t" << flag << "\t" << alignment.tName << "\t"; // RNAME if (alignment.tStrand == 0) { samFile << alignment.TAlignStart() + 1 << "\t"; // POS, add 1 to get 1 based coordinate system } else { samFile << alignment.tLength - (alignment.TAlignStart() + alignment.TEnd()) + 1 << "\t"; // includes - 1 for rev-comp, +1 for one-based } samFile << (int) alignment.mapQV << "\t"// MAPQ << cigarString << "\t"; // CIGAR // // Determine RNEXT std::string rNext; rNext = "*"; /* if (context.hasNextSubreadPos == false) { rNext = "*"; } else { if (context.rNext == alignment.tName) { rNext = "="; } else { rNext = context.rNext; } } */ samFile << rNext << "\t"; // RNEXT DNALength nextSubreadPos = 0; /* if (context.hasNextSubreadPos) { nextSubreadPos = context.nextSubreadPos + 1; }*/ samFile << nextSubreadPos << "\t"; // RNEXT, add 1 for 1 based // indexing //DNALength tLen = alignment.GenomicTEnd() - alignment.GenomicTBegin(); //SAM v1.5, tLen is set as 0 for single-segment template samFile << 0 << "\t"; // TLEN // Print the sequence on one line, and suppress printing the // newline (by setting the line length to alignedSequence.length (static_cast<DNASequence*>(&alignedSequence))->PrintSeq(samFile, 0); // SEQ samFile << "\t"; if (alignedSequence.qual.data != NULL && qvList.useqv == 0) { alignedSequence.PrintAsciiQual(samFile, 0); // QUAL } else { samFile <<"*"; } samFile << "\t"; // // Add optional fields // samFile << "RG:Z:" << context.readGroupId << "\t"; samFile << "AS:i:" << alignment.score << "\t"; // // "RG" read group Id // "AS" alignment score // "XS" read alignment start position without counting previous soft clips (1 based) // "XE" read alignment end position without counting previous soft clips (1 based) // "XL" aligned read length // "XQ" query sequence length // "XT" # of continues reads, always 1 for blasr // "NM" edit distance // "FI" read alignment start position (1 based) // DNALength qAlignStart = alignment.QAlignStart(); DNALength qAlignEnd = alignment.QAlignEnd(); if (clipping == none) { samFile << "XS:i:" << qAlignStart + 1 << "\t"; samFile << "XE:i:" << qAlignEnd + 1 << "\t"; } else if (clipping == hard or clipping == soft or clipping == subread) { DNALength xs = prefixHardClip; DNALength xe = read.length - suffixHardClip; if (alignment.tStrand == 1) { xs = suffixHardClip; xe = read.length - prefixHardClip; } samFile << "XS:i:" << xs + 1 << "\t"; // add 1 for 1-based indexing in sam assert(read.length - suffixHardClip == prefixHardClip + alignedSequence.length); samFile << "XE:i:" << xe + 1 << "\t"; } samFile << "YS:i:" << read.subreadStart << "\t"; samFile << "YE:i:" << read.subreadEnd << "\t"; samFile << "ZM:i:" << read.zmwData.holeNumber << "\t"; samFile << "XL:i:" << alignment.qAlignedSeq.length << "\t"; samFile << "XT:i:1\t"; // reads are allways continuous reads, not // referenced based circular consensus when // output by blasr. samFile << "NM:i:" << context.editDist << "\t"; samFile << "FI:i:" << alignment.qAlignedSeqPos + 1; // Add query sequence length samFile << "\t" << "XQ:i:" << alignment.qLength; // // Write out optional quality values. If qvlist does not // have any qv's signaled to print, this is a no-op. // // First transform characters that are too large to printable ones. qvList.FormatQVOptionalFields(alignedSequence); qvList.PrintQVOptionalFields(alignedSequence, samFile); samFile << std::endl; }