void SimpleAligner::align(dagcon::Alignment& aln) { // This alignment type defined in blasr code base blasr::Alignment initialAln, refinedAln; FASTQSequence query; query.seq = (Nucleotide*)aln.qstr.c_str(); query.length = aln.qstr.length(); DNASequence target; target.seq = (Nucleotide*)aln.tstr.c_str(); target.length = aln.tstr.length(); SDPAlign(query, target, distScoreFn_, tupleMetrics_.tupleSize, config_.sdpIndel, config_.sdpIndel, config_.indelRate*2, initialAln, Local); GuidedAlign(query, target, initialAln, distScoreFn_, config_.bandSize, refinedAln); std::string queryStr, alignStr, targetStr; //StickPrintAlignment(initialAln, query, target, std::cout); //StickPrintAlignment(refinedAln, query, target, std::cout); CreateAlignmentStrings(refinedAln, query.seq, target.seq, targetStr, alignStr, queryStr, query.length, target.length); // alignment coordinates may change, update alignment object aln.start += refinedAln.GenomicTBegin(); aln.end = aln.start + refinedAln.GenomicTEnd(); if (aln.strand == '-') { aln.start = aln.tlen - aln.end; aln.qstr = revComp(queryStr); aln.tstr = revComp(targetStr); } else { aln.qstr = queryStr; aln.tstr = targetStr; } aln.start++; }
void SimpleAligner::align(dagcon::Alignment& aln) { // This alignment type defined in blasr code base blasr::Alignment initialAln, refinedAln; FASTQSequence query; query.seq = (Nucleotide*)aln.qstr.c_str(); query.length = aln.qstr.length(); query.AllocateRichQualityValues(query.length); query.qual.Allocate(query.length); DNASequence target; target.seq = (Nucleotide*)aln.tstr.c_str(); target.length = aln.tstr.length(); SDPAlign(query, target, distScoreFn_, tupleMetrics_.tupleSize, config_.sdpIndel, config_.sdpIndel, config_.indelRate*2, initialAln, Local); GuidedAlign(query, target, initialAln, distScoreFn_, config_.bandSize, refinedAln); //StickPrintAlignment(initialAln, query, target, std::cout); //StickPrintAlignment(refinedAln, query, target, std::cout); std::string queryStr, alignStr, targetStr; CreateAlignmentStrings(refinedAln, query.seq, target.seq, targetStr, alignStr, queryStr, query.length, target.length); if (aln.strand == '-') { aln.start = aln.len - (aln.end + refinedAln.tPos); aln.qstr = revComp(queryStr); aln.tstr = revComp(targetStr); } else { aln.start += refinedAln.tPos; aln.qstr = queryStr; aln.tstr = targetStr; } aln.start++; }
void Print(T_Alignment &alignment, T_Sequence &query, T_Sequence &text, std::ostream &out, int qPrintStart = 0, int tPrintStart = 0, int maxPrintLength = 50) { (void)(qPrintStart); (void)(tPrintStart); (void)(maxPrintLength); /* * Sample alignment: * <hit name="x15_y33_1220208-0008_m081205_152444_Uni_p2_b15" unalignedLength="1301" start="1051" end="1016" strand="-" targetStart="1" targetEnd="44" targetStrand="+"> <zScore value="-6.091"/> <nInsert value="1" percent="2.86" /> <nDelete value="9" percent="25.71" /> <nMismatch value="1" percent="2.86" /> <nCorrect value="24" percent="68.57" /> <alignment><query> AG--CGTTCC-TATGG-TG-GGGTCGTTA-ACT---GTCGCCAG </query><target> AGCCCG-TCCTTATGGTTGAGGGTTGTTACACTTCGGTCGCCAG </target></alignment> </hit> */ char strand[2] = {'+', '-'}; std::string tAlignStr, alignStr, qAlignStr; CreateAlignmentStrings(alignment, query.seq, text.seq, tAlignStr, alignStr, qAlignStr); int alignLength = tAlignStr.size(); if (alignLength == 0) { alignLength = 1; // Make sure there are no divide by zero. alignment.nIns = 0; alignment.nDel = 0; alignment.nMismatch = 0; alignment.nMatch = 0; } out << BeginDataEntry( std::string("hit"), CreateKeywordValuePair(std::string("name"), alignment.qName) + CreateKeywordValuePair(std::string("unalignedLength"), alignment.qLength) + CreateKeywordValuePair(std::string("start"), alignment.qPos) + CreateKeywordValuePair(std::string("end"), alignment.qPos + alignment.qAlignLength) + CreateKeywordValuePair(std::string("strand"), strand[alignment.qStrand]) + CreateKeywordValuePair(std::string("targetStart"), alignment.tPos) + CreateKeywordValuePair(std::string("targetEnd"), alignment.tPos + alignment.tAlignLength) + CreateKeywordValuePair(std::string("targetStrand"), strand[alignment.tStrand])) << std::endl; out << CreateDataEntry(std::string("zScore"), CreateKeywordValuePair(std::string("value"), alignment.zScore)) << std::endl; out << CreateDataEntry(std::string("nInsert"), CreateKeywordValuePair(std::string("value"), alignment.nIns) + " " + CreateKeywordValuePair(std::string("percent"), alignment.nIns * 0.5 / alignLength)) << std::endl; out << CreateDataEntry(std::string("nDelete"), CreateKeywordValuePair(std::string("value"), alignment.nDel) + " " + CreateKeywordValuePair(std::string("percent"), alignment.nDel * 0.5 / alignLength)) << std::endl; out << CreateDataEntry(std::string("nMismatch"), CreateKeywordValuePair(std::string("value"), alignment.nMismatch) + " " + CreateKeywordValuePair(std::string("percent"), alignment.nMismatch * 0.5 / alignLength)) << std::endl; out << CreateDataEntry(std::string("nCorrect"), CreateKeywordValuePair(std::string("value"), alignment.nMatch) + " " + CreateKeywordValuePair(std::string("percent"), alignment.nMatch * 0.5 / alignLength)) << std::endl; out << CreateStartEntry(std::string("alignment"), std::string("")) << CreateStartEntry(std::string("query"), std::string("")) << std::endl; out << qAlignStr << std::endl; out << CreateEndEntry(std::string("query")) << std::endl; out << CreateStartEntry(std::string("target"), std::string("")) << std::endl; out << tAlignStr << std::endl; out << CreateEndEntry(std::string("target")) << std::endl; out << CreateEndEntry(std::string("alignment")) << std::endl; out << CreateEndEntry(std::string("hit")) << std::endl; }
void CompareSequencesOutput::Print(T_Alignment &alignment, T_QuerySequence &qseq, T_TargetSequence &tseq, std::ostream &out, bool refForward) { std::string queryStr, alignStr, textStr; CreateAlignmentStrings(alignment, qseq, tseq, textStr, alignStr, queryStr); if (refForward == false) { if (alignment.qStrand == 1 and alignment.tStrand == 0) { DNALength alignedSeqToEnd = 0; // DNALength alignedTSeqToEnd = 0; if (alignment.blocks.size() > 0) { // First compute the offset of the reverse of the substring that was aligned. alignedSeqToEnd = alignment.qLength - (alignment.qAlignedSeqPos + alignment.qAlignedSeq.length); DNALength alignEndToSubstrEnd = alignment.qAlignedSeq.length - (alignment.qPos + alignment.blocks[alignment.blocks.size()-1].qPos + alignment.blocks[alignment.blocks.size()-1].length); alignment.qPos = alignEndToSubstrEnd; } alignment.qAlignedSeqPos = alignedSeqToEnd; alignment.qStrand = 0; alignment.tStrand = 1; } } PrintCompareSequencesAlignmentStats(alignment, out); // change the spaces in the align string to *s for easy parsing of alignment VectorIndex i; for (i = 0; i < alignStr.size(); i++ ) { if (alignStr[i] == ' ') alignStr[i] = '*'; } if (refForward == false and alignment.tStrand == 1) { // // Build reverse complement strings. // std::string queryStrRC, alignStrRC, textStrRC; queryStrRC.resize(queryStr.size()); alignStrRC.resize(alignStr.size()); textStrRC.resize(alignStr.size()); DNALength pos; DNALength alignStringLength = alignStr.size(); for (pos = 0; pos < alignStringLength; pos++ ) { if (queryStr[pos] != '-') { queryStrRC[alignStringLength-pos-1] = ReverseComplementNuc[static_cast<int>(queryStr[pos])]; } else { queryStrRC[alignStringLength-pos-1] = '-'; } alignStrRC[alignStringLength-pos-1] = alignStr[pos]; if (textStr[pos] != '-') { textStrRC[alignStringLength-pos-1] = ReverseComplementNuc[static_cast<int>(textStr[pos])]; } else { textStrRC[alignStringLength-pos-1] = '-'; } } queryStr = queryStrRC; alignStr = alignStrRC; textStr = textStrRC; } // Headers of m5 format are: // qName qSeqLength qStart qEnd qStrand // tName tSeqLength tStart tEnd tStrand // score numMatch numMismatch numIns numDel // mapQV qAlignedSeq matchPattern tAlignedSeq out << queryStr << " " << alignStr << " " << textStr << std::endl; }