Esempio n. 1
0
// a file with color-coding from Ka/Ks values to color-bins
void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV,
				const sequence & refSeq, string fileName,codon *co) {
	vector<int> colors;
	int numOfSitesinAln = kaksVec.size();
	Vdouble negativesKaksVec,negativesSite;
	negativesKaksVec.clear();
	negativesSite.clear();
	int i,gapsInRefSeq=0;

	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++; 
	}

	// first dealing with positive selection
	colors.resize(numOfSitesinAln-gapsInRefSeq);
	int gap=0;
	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1){
			gap++;
			continue;
		}
		if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1
			colors[i-gap]=1;
		else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant"
			colors[i-gap]=2;
		else  {
			negativesKaksVec.push_back(kaksVec[i]);  //add the value of kaks < 1
			negativesSite.push_back(i-gap);   //add the number of site of the kaks 
		}
	
	}

	// now dealing with purifying selection
	Vdouble orderVec = negativesKaksVec;
	if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly)
		sort(orderVec.begin(), orderVec.end());  //sort the kaks values to be divided to 5 groups
	MDOUBLE percentileNum = 5.0;
	int percentileNumInt = 5;
	Vdouble maxScoreForPercentile(percentileNumInt);
	if (orderVec.size()>0) {
		maxScoreForPercentile[0] = orderVec[0]; 
		for (int c = 1; c < percentileNumInt; ++c){
			int place = (int)((c / percentileNum) * negativesKaksVec.size());
			MDOUBLE maxScore = orderVec[place];
			maxScoreForPercentile[c] = maxScore;
		}
	}

	//loop over all the Ka/Ks < 1  
	for (int j=0; j < negativesKaksVec.size(); ++j){
			MDOUBLE r = negativesKaksVec[j]; //the kaks of the site.
			int s = (int)negativesSite[j];  //the  site.
			if (r > maxScoreForPercentile[4]) 
					colors[s] = 3;
			else if (r > maxScoreForPercentile[3]) 
					colors[s] = 4;
			else if (r> maxScoreForPercentile[2])
					colors[s] = 5;
			else if (r > maxScoreForPercentile[1])
					colors[s] = 6;
			else if (r >= maxScoreForPercentile[0])
					colors[s] = 7;
	}
	//print to file
	ofstream out(fileName.c_str());
	gap=0;
	amino aminoAcid;
	LOG(5,<<"Printing selection color bins to file"<<endl);
	for (i=0;i<refSeq.seqLen();i++){	 
		int aa = codonUtility::aaOf(refSeq[i], *co);
		if (aa==-1){
			gap++;
			continue;
		}
		string aaStr = aminoAcid.fromInt(aa);
		out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap];
		out<<endl;
	}
	out.close();
}
Esempio n. 2
0
// convert the other sequence to the alphabet inAlph.
sequence::sequence(const sequence& other,const alphabet* inAlph)
: _alphabet(inAlph->clone()), _remark(other._remark), _name(other._name), _id(other._id)
{
	const mulAlphabet* pMulAlphabet;
	// if the other.alphabet is amino or nucleotide and the inAlph is indel
	
	if ( (other._alphabet->size() == 20 && inAlph->size() == 2)
		|| (other._alphabet->size() == 4 && inAlph->size() == 2) )
	{
		for (int k=0; k < other.seqLen() ;k += other._alphabet->stringSize()) 
		{
			int charId = other._vec[k];
			
			if (charId == other._alphabet->gap())
					_vec.push_back(inAlph->fromChar("-",0));
			else
				_vec.push_back(inAlph->fromChar("X",0)); //also converts "." (charId==-3) to "X"
				//	unknown amino/nucleotide is converted to "X" and not to "?"
		}
	}
		
	// if the other.alphabet is amino or nucleotide and the inAlph is mulAlphabet
	else if ( (other._alphabet->size() == 20 && inAlph->size()%20 == 0)
		|| (other._alphabet->size() == 4 && inAlph->size()%4 == 0) )
	{
		for (int k=0; k < other.seqLen() ;++k) 
		{
			int charId = other._vec[k];
			string ch = other._alphabet->fromInt(charId);
			int mulCharId = _alphabet->fromChar(ch,0);
			_vec.push_back(mulCharId);
		}
	//	 debug OZ
			//cout << "other sequence: " << other << endl;
			//cout << "mul sequence " << (*this) << endl;
	//	 end of debug
	}
	// if the other.alphabet is mulAlphabet and the inAlph is it's baseAlphabet
	// (for example, if other.alphabet is a multiplied-amino and inAlph is amino, then the converted sequence
	// will have alphabet amino)
	else if ( ((inAlph->size() == 20) && (other._alphabet->size()%20 == 0))
		|| (inAlph->size() == 4) && (other._alphabet->size()%4 == 0))
	{
		pMulAlphabet=(mulAlphabet*)(other._alphabet);
		for (int k=0; k < other.seqLen() ;++k) 
			{
				int mulCharId = other._vec[k];
				int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
				_vec.push_back(baseId);
			}
	}
	
	// I tried to implement it using dynamic_cast but it doesn't work...
	/*else if 
			(
				(pMulAlphabet = dynamic_cast<const mulAlphabet*>(other._alphabet)) != NULL
				)
		{ 
			if			(pMulAlphabet->getBaseAlphabet()->size() == inAlph->size())
		 {
			for (int k=0; k < other.seqLen() ;++k) 
			{
				int mulCharId = other._vec[k];
				int baseId = pMulAlphabet->convertToBasedAlphaInt(mulCharId);
				_vec.push_back(baseId);
			}
		}
		}*/

	// (currently, there is no implimentions for other converts)
	else
	{
		string error = "unable to convert this kind of alphabet";
		errorMsg::reportError(error);
	}
}