// a file with color-coding from Ka/Ks values to color-bins void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV, const sequence & refSeq, string fileName,codon *co) { vector<int> colors; int numOfSitesinAln = kaksVec.size(); Vdouble negativesKaksVec,negativesSite; negativesKaksVec.clear(); negativesSite.clear(); int i,gapsInRefSeq=0; for (i=0;i<numOfSitesinAln;i++){ if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++; } // first dealing with positive selection colors.resize(numOfSitesinAln-gapsInRefSeq); int gap=0; for (i=0;i<numOfSitesinAln;i++){ if (codonUtility::aaOf(refSeq[i],*co) == -1){ gap++; continue; } if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1 colors[i-gap]=1; else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant" colors[i-gap]=2; else { negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1 negativesSite.push_back(i-gap); //add the number of site of the kaks } } // now dealing with purifying selection Vdouble orderVec = negativesKaksVec; if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly) sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups MDOUBLE percentileNum = 5.0; int percentileNumInt = 5; Vdouble maxScoreForPercentile(percentileNumInt); if (orderVec.size()>0) { maxScoreForPercentile[0] = orderVec[0]; for (int c = 1; c < percentileNumInt; ++c){ int place = (int)((c / percentileNum) * negativesKaksVec.size()); MDOUBLE maxScore = orderVec[place]; maxScoreForPercentile[c] = maxScore; } } //loop over all the Ka/Ks < 1 for (int j=0; j < negativesKaksVec.size(); ++j){ MDOUBLE r = negativesKaksVec[j]; //the kaks of the site. int s = (int)negativesSite[j]; //the site. if (r > maxScoreForPercentile[4]) colors[s] = 3; else if (r > maxScoreForPercentile[3]) colors[s] = 4; else if (r> maxScoreForPercentile[2]) colors[s] = 5; else if (r > maxScoreForPercentile[1]) colors[s] = 6; else if (r >= maxScoreForPercentile[0]) colors[s] = 7; } //print to file ofstream out(fileName.c_str()); gap=0; amino aminoAcid; LOG(5,<<"Printing selection color bins to file"<<endl); for (i=0;i<refSeq.seqLen();i++){ int aa = codonUtility::aaOf(refSeq[i], *co); if (aa==-1){ gap++; continue; } string aaStr = aminoAcid.fromInt(aa); out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap]; out<<endl; } out.close(); }
// convert the other sequence to the alphabet inAlph. sequence::sequence(const sequence& other,const alphabet* inAlph) : _alphabet(inAlph->clone()), _remark(other._remark), _name(other._name), _id(other._id) { const mulAlphabet* pMulAlphabet; // if the other.alphabet is amino or nucleotide and the inAlph is indel if ( (other._alphabet->size() == 20 && inAlph->size() == 2) || (other._alphabet->size() == 4 && inAlph->size() == 2) ) { for (int k=0; k < other.seqLen() ;k += other._alphabet->stringSize()) { int charId = other._vec[k]; if (charId == other._alphabet->gap()) _vec.push_back(inAlph->fromChar("-",0)); else _vec.push_back(inAlph->fromChar("X",0)); //also converts "." (charId==-3) to "X" // unknown amino/nucleotide is converted to "X" and not to "?" } } // if the other.alphabet is amino or nucleotide and the inAlph is mulAlphabet else if ( (other._alphabet->size() == 20 && inAlph->size()%20 == 0) || (other._alphabet->size() == 4 && inAlph->size()%4 == 0) ) { for (int k=0; k < other.seqLen() ;++k) { int charId = other._vec[k]; string ch = other._alphabet->fromInt(charId); int mulCharId = _alphabet->fromChar(ch,0); _vec.push_back(mulCharId); } // debug OZ //cout << "other sequence: " << other << endl; //cout << "mul sequence " << (*this) << endl; // end of debug } // if the other.alphabet is mulAlphabet and the inAlph is it's baseAlphabet // (for example, if other.alphabet is a multiplied-amino and inAlph is amino, then the converted sequence // will have alphabet amino) else if ( ((inAlph->size() == 20) && (other._alphabet->size()%20 == 0)) || (inAlph->size() == 4) && (other._alphabet->size()%4 == 0)) { pMulAlphabet=(mulAlphabet*)(other._alphabet); for (int k=0; k < other.seqLen() ;++k) { int mulCharId = other._vec[k]; int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId); _vec.push_back(baseId); } } // I tried to implement it using dynamic_cast but it doesn't work... /*else if ( (pMulAlphabet = dynamic_cast<const mulAlphabet*>(other._alphabet)) != NULL ) { if (pMulAlphabet->getBaseAlphabet()->size() == inAlph->size()) { for (int k=0; k < other.seqLen() ;++k) { int mulCharId = other._vec[k]; int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId); _vec.push_back(baseId); } } }*/ // (currently, there is no implimentions for other converts) else { string error = "unable to convert this kind of alphabet"; errorMsg::reportError(error); } }