// converts a space separated quality string into a compressed quality string // NOTE: this function has horrible amounts of overhead, but lean and mean code that I had before // failed some of the unit tests. void CRegexUtilities::ConvertQualities(string& qualities, CMosaikString& compQualities) { string::iterator strIte = qualities.end() - 1; while ( *strIte == ' ' ) { qualities.erase( strIte ); strIte--; } vector<string> columns; vector<string>::const_iterator sIter; char* pQualities = (char*)qualities.c_str(); Chomp(pQualities); back_insert_iterator<vector<string> > backiter(columns); SplitString(backiter, " ", pQualities); const unsigned int numQualities = (unsigned int)columns.size(); compQualities.Reserve(numQualities); compQualities.SetLength(numQualities); unsigned char* pCompQualities = (unsigned char*)compQualities.Data(); for(sIter = columns.begin(); sIter != columns.end(); ++sIter, ++pCompQualities) { if(sIter->empty()) continue; *pCompQualities = GetUnsignedChar((char*)sIter->c_str()); } }
// converts the supplied read from pseudo-colorspace to colorspace // The function is used by the unaligned-read writer. void CColorspaceUtilities::ConvertReadPseudoColorspaceToColorspace(CMosaikString& s) { char* pBases = s.Data(); for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) { switch(*pBases) { case 'A': *pBases = '0'; break; case 'C': *pBases = '1'; break; case 'G': *pBases = '2'; break; case 'T': *pBases = '3'; break; case 'X': case 'N': break; default: printf("ERROR: Unrecognized nucleotide (%c) when converting read to colorspace.\n", pBases[i]); exit(1); break; } } }
// converts the supplied read from colorspace to pseudo-colorspace void CColorspaceUtilities::ConvertReadColorspaceToPseudoColorspace(CMosaikString& s) { char* pBases = s.Data(); for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) { switch(*pBases) { case '0': *pBases = 'A'; break; case '1': *pBases = 'C'; break; case '2': *pBases = 'G'; break; case '3': *pBases = 'T'; break; case 'X': break; case '-': *pBases = 'N'; break; case '.': // here we pick an arbitrary colorspace transition, this will have at // least 25 % of being correct as opposed to specifying an 'N'. *pBases = 'A'; break; default: printf("ERROR: Unrecognized nucleotide (%c) when converting read to pseudo-colorspace.\n", pBases[i]); exit(1); break; } } }
// converts the supplied read from basespace to pseudo-colorspace void CColorspaceUtilities::ConvertReadBasespaceToPseudoColorspace(CMosaikString& s) { char* pPrev = s.Data(); char* pString = pPrev + 1; // simplify various ambiguity codes *pPrev = GetSimplifiedBase(*pPrev); CS_MAP_t::const_iterator csIter; for(unsigned int i = 1; i < s.Length(); ++i, ++pString, ++pPrev) { // simplify various ambiguity codes *pString = GetSimplifiedBase(*pString); csIter = mCSMap.find(PACK_SHORT(*pPrev, *pString)); if(csIter == mCSMap.end()) { printf("ERROR: Unknown combination found when converting to colorspace: [%c] & [%c]\n", *pPrev, *pString); exit(1); } *pPrev = csIter->second; } // adjust the read s.TrimEnd(1); }
// encodes the supplied query sequence into 4-bit notation void CBamWriter::EncodeQuerySequence(const CMosaikString& query, string& encodedQuery) { // prepare the encoded query string const unsigned int queryLen = query.Length(); const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5); encodedQuery.resize(encodedQueryLen); char* pEncodedQuery = (char*)encodedQuery.data(); const char* pQuery = (const char*)query.CData(); unsigned char nucleotideCode; bool useHighWord = true; while(*pQuery) { switch(*pQuery) { case '=': nucleotideCode = 0; break; case 'A': nucleotideCode = 1; break; case 'C': nucleotideCode = 2; break; case 'G': nucleotideCode = 4; break; case 'T': nucleotideCode = 8; break; case 'N': nucleotideCode = 15; break; default: printf("ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery); exit(1); } // pack the nucleotide code if(useHighWord) { *pEncodedQuery = nucleotideCode << 4; useHighWord = false; } else { *pEncodedQuery |= nucleotideCode; ++pEncodedQuery; useHighWord = true; } // increment the query position ++pQuery; } }
// extracts the genome assembly ID from a FASTA/FASTQ header void CRegexUtilities::ExtractGenomeAssemblyID(const string& line, CMosaikString& genomeAssemblyID) { #ifdef WIN32 cmatch results; if(!regex_search(line.c_str(), results, mGenomeAssemblyIDRegex)) { genomeAssemblyID.SetLength(0); return; } genomeAssemblyID = results[1].str().c_str(); #else // TODO: replace this with the TR1 regex above when it finally works in gcc. It doesn't work in gcc 4.3.3 // find the GA tag const string gaTag = "GA("; string::size_type gaPos = line.find(gaTag.c_str()); if(gaPos == string::npos) { genomeAssemblyID.SetLength(0); return; } // find the matching end parenthesis const unsigned int start = gaPos + gaTag.size(); unsigned int stop = start; const char* pBuffer = line.data(); unsigned int lineLen = line.size(); if(stop < lineLen) { while(pBuffer[stop] != ')') { stop++; if(stop == lineLen) break; } } if(start == stop) { cout << "ERROR: could not parse genome assembly ID from FASTA header." << endl; cout << " " << line << endl; exit(1); } genomeAssemblyID = line.substr(start, stop - start).c_str(); #endif }
// load the read header from disk void CAlignmentReader::LoadReadHeader( CMosaikString& readName, unsigned int& readGroupCode, unsigned char& readStatus, int& numMate1Alignments, int& numMate2Alignments, int& numMate1OriginalAlignments, int& numMate2OriginalAlignments, int& numMate1Hashes, int& numMate2Hashes) { // get the read name const unsigned char readNameLength = (unsigned char)*mBufferPtr; ++mBufferPtr; readName.Copy((const char*)mBufferPtr, readNameLength); mBufferPtr += readNameLength; // get the read group code memcpy((char*)&readGroupCode, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; // get the read status readStatus = (unsigned char)*mBufferPtr; ++mBufferPtr; const bool haveMate1 = ((readStatus & RF_HAVE_MATE1) != 0 ? true : false); const bool haveMate2 = ((readStatus & RF_HAVE_MATE2) != 0 ? true : false); // get the number of mate 1 alignments if(haveMate1) { memcpy((char*)&numMate1Alignments, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; memcpy((char*)&numMate1OriginalAlignments, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; memcpy((char*)&numMate1Hashes, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; } // get the number of mate 2 alignments if(haveMate2) { memcpy((char*)&numMate2Alignments, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; memcpy((char*)&numMate2OriginalAlignments, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; memcpy((char*)&numMate2Hashes, mBufferPtr, SIZEOF_INT); mBufferPtr += SIZEOF_INT; } }
// saves the alignment to the alignment archive void CBamWriter::SaveAlignment( const Alignment& al, const char* zaString, const bool& noCigarMdNm, const bool& notShowRnamePos, const bool& isSolid, const bool& processedBamData, const bool& report_zn) { // ================= // set the BAM flags // ================= // define our flags unsigned int flag = 0; int insertSize = 0; if(al.IsPairedEnd) { flag |= BAM_SEQUENCED_AS_PAIRS; // first or second mate? flag |= (al.IsFirstMate ? BAM_QUERY_FIRST_MATE : BAM_QUERY_SECOND_MATE); if(al.IsResolvedAsPair) { if ( al.IsResolvedAsProperPair ) flag |= BAM_PROPER_PAIR; if(al.IsMateMapped && al.IsMateReverseStrand) flag |= BAM_MATE_REVERSE_COMPLEMENT; insertSize = al.FragmentLength; } if ( !al.IsMapped ) flag |= BAM_QUERY_UNMAPPED; if ( !al.IsMateMapped ) flag |= BAM_MATE_UNMAPPED; } else { if ( !al.IsMapped ) flag |= BAM_QUERY_UNMAPPED; } if(al.IsMapped && al.IsReverseStrand) flag |= BAM_QUERY_REVERSE_COMPLEMENT; // ========================== // construct the cigar string // ========================== string packedCigar; unsigned short numCigarOperations = 0; if ( !noCigarMdNm ) { if ( !processedBamData ) CreatePackedCigar( al, packedCigar, numCigarOperations, isSolid ); else { packedCigar = al.PackedCigar; numCigarOperations = al.NumCigarOperation; } } else packedCigar = "\0"; const unsigned int packedCigarLen = !noCigarMdNm ? packedCigar.size() : 0; // =================== // write the alignment // =================== // remove the gaps from the read CMosaikString query; if ( !processedBamData ) { query = al.Query.CData(); query.Remove('-'); } // initialize const unsigned int nameLen = al.Name.Length() + 1; const unsigned int queryLen = processedBamData ? al.QueryLength : query.Length(); // sanity check //al.BaseQualities.CheckQuality(); //if ( queryLen != alIter->BaseQualities.Length() ) { // printf("ERROR: The lengths of bases(%u) and qualities(%u) of Read (%s) didn't match.\n", queryLen, alIter->BaseQualities.Length(), readName.CData()); // exit(1); //} // encode the query string encodedQuery; if (!processedBamData && (query.Length() != 0)) EncodeQuerySequence(query, encodedQuery); else encodedQuery = al.EncodedQuery; const unsigned int encodedQueryLen = encodedQuery.size(); // create our read group tag string readGroupTag; const unsigned int readGroupTagLen = 3 + al.ReadGroup.size() + 1; readGroupTag.resize(readGroupTagLen); char* pReadGroupTag = (char*)readGroupTag.data(); sprintf(pReadGroupTag, "RGZ%s", al.ReadGroup.c_str()); // create our mismatch tag string mismatchTag; unsigned int numMismatches = 0; unsigned int nmTagLen = 0; if ( !noCigarMdNm ) { mismatchTag = "NMi"; mismatchTag.resize(MISMATCH_TAG_LEN); nmTagLen = MISMATCH_TAG_LEN; numMismatches = al.NumMismatches; memcpy((char*)mismatchTag.data() + 3, (char*)&numMismatches, SIZEOF_INT); } // create our MD tag string mdTag; char* pMd = 0; unsigned int mdTagLen = 0; char* pMdTag; if ( !noCigarMdNm ) { if ( !processedBamData ) pMd = (char*) mdTager.GetMdTag( al.Reference.CData(), al.Query.CData(), al.Reference.Length() ); else pMd = (char*) al.MdString.c_str(); mdTagLen = 3 + strlen( pMd ) + 1; mdTag.resize( mdTagLen ); pMdTag = (char*)mdTag.data(); sprintf(pMdTag, "MDZ%s", pMd); #ifdef VERBOSE_DEBUG fprintf(stderr, "=== MD ===\n"); fprintf(stderr, "%s\n%s\n%s\n", al.Reference.CData(), al.Query.CData(), mdTag.c_str()); #endif } // create our za tag unsigned int zaTagLen = 0; string zaTag; char* pZaTag; if ((zaString != 0) && (zaString != (char)0)) { zaTagLen = 3 + strlen( zaString ) + 1; zaTag.resize( zaTagLen ); pZaTag = (char*)zaTag.data(); sprintf(pZaTag, "ZAZ%s",zaString); } // create our zn tag unsigned int znTagLen = 0; string znTag; if (report_zn){ ostringstream zn_buffer; zn_buffer << "ZNZ" << al.SwScore << ";" << al.NextSwScore << ";" << al.NumLongestMatchs << ";" << al.Entropy << ";" << al.NumMapped << ";" << al.NumHash; znTag = zn_buffer.str(); znTagLen = znTag.size() + 1; //cerr << znTag.data() << "\t" << znTag << "\t" << znTagLen << endl; } // create our cs tag unsigned int csTagLen = 0; string csTag; char* pCsTag; if (isSolid) { csTagLen = 3 + strlen ( al.CsQuery.c_str() ) + 1; csTag.resize( csTagLen ); pCsTag = (char*)csTag.data(); sprintf( pCsTag, "CSZ%s", al.CsQuery.c_str() ); } // create our cq tag unsigned int cqTagLen = 0; string cqTag; char* pCqTag; if (isSolid) { cqTagLen = 3 + strlen ( al.CsBaseQualities.c_str() ) + 1; cqTag.resize( cqTagLen ); pCqTag = (char*)cqTag.data(); sprintf( pCqTag, "CQZ%s", al.CsBaseQualities.c_str() ); } // retrieve our bin unsigned int bin = CalculateMinimumBin(al.ReferenceBegin, al.ReferenceEnd); // assign the BAM core data unsigned int buffer[8] = {0}; unsigned int reference_index, reference_pos; if (!al.IsMapped) { if (al.IsMateMapped) reference_index = al.MateReferenceIndex; else reference_index = 0xffffffff; } else { reference_index = al.ReferenceIndex; } if (!al.IsMapped) { if (al.IsMateMapped) reference_pos = al.MateReferenceBegin; else reference_pos = 0xffffffff; } else { reference_pos = al.ReferenceBegin; } buffer[0] = (notShowRnamePos) ? 0xffffffff : reference_index; buffer[1] = (notShowRnamePos) ? 0xffffffff : reference_pos; buffer[2] = (bin << 16) | (al.RecalibratedQuality << 8) | nameLen; buffer[3] = (flag << 16) | numCigarOperations; buffer[4] = queryLen; if(al.IsPairedEnd) { if (notShowRnamePos) { buffer[5] = 0xffffffff; buffer[6] = 0xffffffff; } else { if (!al.IsMateMapped) {//unmapped mate buffer[5] = reference_index; buffer[6] = reference_pos; } else { buffer[5] = al.MateReferenceIndex; buffer[6] = al.MateReferenceBegin; } } buffer[7] = insertSize; } else { buffer[5] = 0xffffffff; buffer[6] = 0xffffffff; buffer[7] = 0; } // write the block size const unsigned int dataBlockSize = nameLen + packedCigarLen + encodedQueryLen + queryLen + readGroupTagLen + nmTagLen + mdTagLen + zaTagLen + znTagLen + csTagLen + cqTagLen; const unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize; BgzfWrite((char*)&blockSize, SIZEOF_INT); // write the BAM core BgzfWrite((char*)&buffer, BAM_CORE_SIZE); // write the query name BgzfWrite(al.Name.CData(), nameLen); // write the packed cigar BgzfWrite(packedCigar.data(), packedCigarLen); // write the encoded query sequence BgzfWrite(encodedQuery.data(), encodedQueryLen); // write the base qualities BgzfWrite(al.BaseQualities.CData(), queryLen); // write the read group tag BgzfWrite(readGroupTag.data(), readGroupTagLen); // write the mismatch tag if ( !noCigarMdNm ) BgzfWrite(mismatchTag.data(), MISMATCH_TAG_LEN); // write the MD tag if ( !noCigarMdNm ) BgzfWrite(mdTag.data(), mdTagLen); // write the ZA tag if ( zaString != 0 && (zaString != (char)0)) BgzfWrite(zaTag.data(), zaTagLen); // write the ZN tag if (report_zn && (znTagLen > 0)) BgzfWrite(znTag.data(), znTagLen); // write the cs tag if (isSolid) BgzfWrite(csTag.data(), csTagLen); // write the cq tag if (isSolid) BgzfWrite(cqTag.data(), cqTagLen); }
// converts the supplied alignment from colorspace to basespace void CColorspaceUtilities::ConvertAlignmentToBasespace(Alignment& al) { // convert the alignment to character arrays const unsigned int pairwiseLen = al.Reference.Length(); //char* pReference = al.Reference.Data(); //char* pQuery = al.Query.Data(); // re-allocate mBsRef & mBsQuery if the reversed space is insufficient if( pairwiseLen > mCsAl.csAlignmentLength ) { //if ( mCsAl.csReference ) delete [] mCsAl.csReference; //if ( mCsAl.csQuery ) delete [] mCsAl.csQuery; //if ( mCsAl.bsReference ) delete [] mCsAl.bsReference; //if ( mCsAl.bsQuery ) delete [] mCsAl.bsQuery; //if ( mCsAl.type ) delete [] mCsAl.type; //if ( mCsAl.dashReference ) delete [] mCsAl.dashReference; //if ( mCsAl.dashQuery ) delete [] mCsAl.dashQuery; //if ( mCsAl.mismatch ) delete [] mCsAl.mismatch; //if ( mCsAl.identical ) delete [] mCsAl.identical; delete [] mCsAl.csReference; delete [] mCsAl.csQuery; delete [] mCsAl.bsReference; delete [] mCsAl.bsQuery; delete [] mCsAl.type; delete [] mCsAl.dashReference; delete [] mCsAl.dashQuery; delete [] mCsAl.mismatch; delete [] mCsAl.identical; try { mCsAl.csAlignmentLength = pairwiseLen; mCsAl.csReference = new char [ pairwiseLen ]; mCsAl.csQuery = new char [ pairwiseLen ]; mCsAl.bsReference = new char [ pairwiseLen + 2 ]; mCsAl.bsQuery = new char [ pairwiseLen + 2 ]; mCsAl.type = new unsigned short [pairwiseLen]; mCsAl.dashReference = new RegionT [ pairwiseLen ]; mCsAl.dashQuery = new RegionT [ pairwiseLen ]; mCsAl.mismatch = new unsigned int [ pairwiseLen ]; mCsAl.identical = new RegionT [ pairwiseLen ]; } catch( bad_alloc ) { cout << "ERROR: Unable to allocate enough memory for converting colorspace ." << endl; exit(1); } } // initialize the counters mCsAl.nDashReference = 0; mCsAl.nDashQuery = 0; mCsAl.nMismatch = 0; mCsAl.nIdentical = 0; // convert cs to bs // initial the first BS base char bsBase = mpBsRefSeqs[al.ReferenceIndex][al.ReferenceBegin]; if ( bsBase == 'N' || bsBase == 'X' ) { cout << "ERROR: The first base of the colorspace-basespace converter is N or X." << endl; exit(1); } // copy CS alignments memcpy ( mCsAl.csReference, al.Reference.Data(), pairwiseLen ); memcpy ( mCsAl.csQuery, al.Query.Data(), pairwiseLen ); mCsAl.bsReference[0] = bsBase; mCsAl.bsQuery[0] = bsBase; ConvertCs2Bs(mCsAl.csReference, mCsAl.bsReference, 0, pairwiseLen-1, bsBase); ConvertCs2Bs(mCsAl.csQuery, mCsAl.bsQuery, 0, pairwiseLen-1, bsBase); // search the dash regions & mismatches bool continuedDReference = false; bool continuedDQuery = false; unsigned int nMatch = 0; BS_MAP_t::const_iterator bsIter; for ( unsigned int i = 0; i < pairwiseLen; i++ ) { // determine identical region const bool isEndIdentity = ( mCsAl.csQuery[i] != mCsAl.csReference[i] ) && ( nMatch >= mNAllowedMismatch ); if ( isEndIdentity ) { mCsAl.identical[ mCsAl.nIdentical ].Begin = i - nMatch; mCsAl.identical[ mCsAl.nIdentical ].Length = nMatch; mCsAl.nIdentical++; } if ( mCsAl.csQuery[i] == mCsAl.csReference[i]) nMatch++; else nMatch = 0; // determine mismatches bool isN = (mCsAl.csReference[i] != 'A') && (mCsAl.csReference[i] != 'C') && (mCsAl.csReference[i] != 'G') && (mCsAl.csReference[i] != 'T'); const bool isMismatch = (mCsAl.csReference[i] != '-') && (mCsAl.csQuery[i] != '-') && !isN && (mCsAl.csReference[i] != mCsAl.csQuery[i]); if ( isMismatch ) { // set the position mCsAl.mismatch[mCsAl.nMismatch] = i; mCsAl.nMismatch++; // set the mismatch flag mCsAl.type[i] = 0; } // for reference convertion if ( mCsAl.csReference[i] != '-' ) { // end the current dash region if ( continuedDReference ) { mCsAl.nDashReference++; // the current position could be a mismatch mCsAl.mismatch[mCsAl.nMismatch] = i; mCsAl.nMismatch++; mCsAl.type[i] = 1; } continuedDReference = false; } else { // start a dash region if ( !continuedDReference ) { mCsAl.dashReference[mCsAl.nDashReference].Begin = i; mCsAl.dashReference[mCsAl.nDashReference].Length = 0; // the preceding position could be a mismatch mCsAl.mismatch[mCsAl.nMismatch] = i; mCsAl.nMismatch++; mCsAl.type[i] = 3; } mCsAl.dashReference[mCsAl.nDashReference].Length++; continuedDReference = true; } // for query convertion if ( mCsAl.csQuery[i] != '-' ) { // end the current dash region if ( continuedDQuery ) { mCsAl.nDashQuery++; // the current position could be a mismatch mCsAl.mismatch[mCsAl.nMismatch] = i; mCsAl.nMismatch++; mCsAl.type[i] = 2; } continuedDQuery = false; } else { // start a dash region if ( !continuedDQuery ) { mCsAl.dashQuery[mCsAl.nDashQuery].Begin = i; mCsAl.dashQuery[mCsAl.nDashQuery].Length = 0; // the preceding position could be a mismatch mCsAl.mismatch[mCsAl.nMismatch] = i; mCsAl.nMismatch++; mCsAl.type[i] = 4; } mCsAl.dashQuery[mCsAl.nDashQuery].Length++; continuedDQuery = true; } } if ( nMatch > 0 ) { mCsAl.identical[ mCsAl.nIdentical ].Begin = pairwiseLen - nMatch; mCsAl.identical[ mCsAl.nIdentical ].Length = nMatch; mCsAl.nIdentical++; } if ( mCsAl.identical[mCsAl.nIdentical - 1].Begin != 0 ) { // find sequencing errors if ( mCsAl.nMismatch > 0 ) FindSequencingError(pairwiseLen); if ( mCsAl.nDashReference > 0 ) { for ( unsigned int i = 0; i < mCsAl.nDashReference; i++ ) { unsigned int curPosition = mCsAl.dashReference[i].Begin + mCsAl.dashReference[i].Length; if ( mCsAl.bsReference[ curPosition ] != mCsAl.bsQuery[ curPosition ] ) { curPosition = mCsAl.dashReference[i].Begin; for ( unsigned int j = 0; j < mCsAl.dashReference[i].Length; j++ ) mCsAl.bsQuery[ curPosition + j + 1 ] = 'N'; } } AdjustDash(mCsAl.csReference, mCsAl.csQuery, mCsAl.dashReference, mCsAl.nDashReference, mCsAl.bsReference); } if ( mCsAl.nDashQuery > 0 ) AdjustDash(mCsAl.csQuery, mCsAl.csReference, mCsAl.dashQuery, mCsAl.nDashQuery, mCsAl.bsQuery); // deal with the indentical region for ( unsigned int i = 0; i < mCsAl.nIdentical; i++ ) { unsigned int csEnd = mCsAl.identical[i].Begin + mCsAl.identical[i].Length - 1; unsigned int curPosition = mCsAl.identical[i].Begin; bsBase = mCsAl.bsReference[ curPosition ]; while ( ( bsBase == '-' ) || ( bsBase == 'N' ) ) { curPosition--; bsBase = mCsAl.bsReference[ curPosition ]; if ( curPosition == 0 ) break; } bool isGoodBsBase = false; if ( ( bsBase != '-' ) && ( bsBase != 'N' ) ) isGoodBsBase = true; if ( isGoodBsBase ) ConvertCs2Bs(mCsAl.csQuery, mCsAl.bsQuery, mCsAl.identical[i].Begin, csEnd, bsBase); } } // end up the sequences mCsAl.bsReference[ pairwiseLen + 1 ] = 0; mCsAl.bsQuery[ pairwiseLen + 1 ] = 0; al.Reference = mCsAl.bsReference; al.Query = mCsAl.bsQuery; ++al.ReferenceEnd; ++al.QueryEnd; al.QueryLength = al.QueryEnd - al.QueryBegin + 1; // ------------------------------------------------------------------------------------------ // convert the colorspace transition qualities to base qualities // NOTE: this algorithm will simply take the minimum of the two qualities that overlap a base // ------------------------------------------------------------------------------------------ const unsigned short numColorspaceQualities = al.BaseQualities.Length(); const unsigned short lastCSQIndex = numColorspaceQualities - 1; CMosaikString csQualities = al.BaseQualities; al.BaseQualities.Reserve(numColorspaceQualities + 1); al.BaseQualities.SetLength(numColorspaceQualities + 1); const char* pCSQual = csQualities.CData(); char* pBSQual = al.BaseQualities.Data(); // handle the first base quality *pBSQual = *pCSQual; ++pBSQual; // handle the internal base qualities for(unsigned short i = 1; i < numColorspaceQualities; ++i, ++pBSQual) *pBSQual = min(pCSQual[i - 1], pCSQual[i]); // handle the final base quality *pBSQual = pCSQual[lastCSQIndex]; // update the number of mismatches // TODO: This should be augmented to support IUPAC ambiguity codes const unsigned int bsPairwiseLen = al.Reference.Length(); al.NumMismatches = 0; for(unsigned short i = 0; i < bsPairwiseLen; ++i) { if(mCsAl.bsReference[i] != mCsAl.bsQuery[i]) al.NumMismatches++; } }