// converts a space separated quality string into a compressed quality string // NOTE: this function has horrible amounts of overhead, but lean and mean code that I had before // failed some of the unit tests. void CRegexUtilities::ConvertQualities(string& qualities, CMosaikString& compQualities) { string::iterator strIte = qualities.end() - 1; while ( *strIte == ' ' ) { qualities.erase( strIte ); strIte--; } vector<string> columns; vector<string>::const_iterator sIter; char* pQualities = (char*)qualities.c_str(); Chomp(pQualities); back_insert_iterator<vector<string> > backiter(columns); SplitString(backiter, " ", pQualities); const unsigned int numQualities = (unsigned int)columns.size(); compQualities.Reserve(numQualities); compQualities.SetLength(numQualities); unsigned char* pCompQualities = (unsigned char*)compQualities.Data(); for(sIter = columns.begin(); sIter != columns.end(); ++sIter, ++pCompQualities) { if(sIter->empty()) continue; *pCompQualities = GetUnsignedChar((char*)sIter->c_str()); } }
// converts the supplied read from colorspace to pseudo-colorspace void CColorspaceUtilities::ConvertReadColorspaceToPseudoColorspace(CMosaikString& s) { char* pBases = s.Data(); for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) { switch(*pBases) { case '0': *pBases = 'A'; break; case '1': *pBases = 'C'; break; case '2': *pBases = 'G'; break; case '3': *pBases = 'T'; break; case 'X': break; case '-': *pBases = 'N'; break; case '.': // here we pick an arbitrary colorspace transition, this will have at // least 25 % of being correct as opposed to specifying an 'N'. *pBases = 'A'; break; default: printf("ERROR: Unrecognized nucleotide (%c) when converting read to pseudo-colorspace.\n", pBases[i]); exit(1); break; } } }
// converts the supplied read from pseudo-colorspace to colorspace // The function is used by the unaligned-read writer. void CColorspaceUtilities::ConvertReadPseudoColorspaceToColorspace(CMosaikString& s) { char* pBases = s.Data(); for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) { switch(*pBases) { case 'A': *pBases = '0'; break; case 'C': *pBases = '1'; break; case 'G': *pBases = '2'; break; case 'T': *pBases = '3'; break; case 'X': case 'N': break; default: printf("ERROR: Unrecognized nucleotide (%c) when converting read to colorspace.\n", pBases[i]); exit(1); break; } } }
// converts the supplied read from basespace to pseudo-colorspace void CColorspaceUtilities::ConvertReadBasespaceToPseudoColorspace(CMosaikString& s) { char* pPrev = s.Data(); char* pString = pPrev + 1; // simplify various ambiguity codes *pPrev = GetSimplifiedBase(*pPrev); CS_MAP_t::const_iterator csIter; for(unsigned int i = 1; i < s.Length(); ++i, ++pString, ++pPrev) { // simplify various ambiguity codes *pString = GetSimplifiedBase(*pString); csIter = mCSMap.find(PACK_SHORT(*pPrev, *pString)); if(csIter == mCSMap.end()) { printf("ERROR: Unknown combination found when converting to colorspace: [%c] & [%c]\n", *pPrev, *pString); exit(1); } *pPrev = csIter->second; } // adjust the read s.TrimEnd(1); }