Exemple #1
0
// converts a space separated quality string into a compressed quality string
// NOTE: this function has horrible amounts of overhead, but lean and mean code that I had before
//       failed some of the unit tests.
void CRegexUtilities::ConvertQualities(string& qualities, CMosaikString& compQualities) {

	string::iterator strIte = qualities.end() - 1;
	while ( *strIte == ' ' ) {
		qualities.erase( strIte );
		strIte--;
	}
	
	vector<string> columns;
	vector<string>::const_iterator sIter;

	char* pQualities = (char*)qualities.c_str();
	Chomp(pQualities);

	back_insert_iterator<vector<string> > backiter(columns);
	SplitString(backiter, " ", pQualities);
	const unsigned int numQualities = (unsigned int)columns.size();

	compQualities.Reserve(numQualities);
	compQualities.SetLength(numQualities);

	unsigned char* pCompQualities = (unsigned char*)compQualities.Data();

	for(sIter = columns.begin(); sIter != columns.end(); ++sIter, ++pCompQualities) {
		if(sIter->empty()) continue;
		*pCompQualities = GetUnsignedChar((char*)sIter->c_str());
	}
}
// converts the supplied read from colorspace to pseudo-colorspace
void CColorspaceUtilities::ConvertReadColorspaceToPseudoColorspace(CMosaikString& s) {
    char* pBases = s.Data();
    for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) {
        switch(*pBases) {
        case '0':
            *pBases = 'A';
            break;
        case '1':
            *pBases = 'C';
            break;
        case '2':
            *pBases = 'G';
            break;
        case '3':
            *pBases = 'T';
            break;
        case 'X':
            break;
        case '-':
            *pBases = 'N';
            break;
        case '.':
            // here we pick an arbitrary colorspace transition, this will have at
            // least 25 % of being correct as opposed to specifying an 'N'.
            *pBases = 'A';
            break;
        default:
            printf("ERROR: Unrecognized nucleotide (%c) when converting read to pseudo-colorspace.\n", pBases[i]);
            exit(1);
            break;
        }
    }
}
// converts the supplied read from pseudo-colorspace to colorspace
// The function is used by the unaligned-read writer.
void CColorspaceUtilities::ConvertReadPseudoColorspaceToColorspace(CMosaikString& s) {
    char* pBases = s.Data();
    for(unsigned int i = 0; i < s.Length(); ++i, ++pBases) {
        switch(*pBases) {
        case 'A':
            *pBases = '0';
            break;
        case 'C':
            *pBases = '1';
            break;
        case 'G':
            *pBases = '2';
            break;
        case 'T':
            *pBases = '3';
            break;
        case 'X':
        case 'N':
            break;
        default:
            printf("ERROR: Unrecognized nucleotide (%c) when converting read to colorspace.\n", pBases[i]);
            exit(1);
            break;
        }
    }
}
// converts the supplied read from basespace to pseudo-colorspace
void CColorspaceUtilities::ConvertReadBasespaceToPseudoColorspace(CMosaikString& s) {

    char* pPrev   = s.Data();
    char* pString = pPrev + 1;

    // simplify various ambiguity codes
    *pPrev = GetSimplifiedBase(*pPrev);

    CS_MAP_t::const_iterator csIter;
    for(unsigned int i = 1; i < s.Length(); ++i, ++pString, ++pPrev) {

        // simplify various ambiguity codes
        *pString = GetSimplifiedBase(*pString);

        csIter = mCSMap.find(PACK_SHORT(*pPrev, *pString));
        if(csIter == mCSMap.end()) {
            printf("ERROR: Unknown combination found when converting to colorspace: [%c] & [%c]\n", *pPrev, *pString);
            exit(1);
        }

        *pPrev = csIter->second;
    }

    // adjust the read
    s.TrimEnd(1);
}