Beispiel #1
0
void MSA::ListWeights() const
	{
	const unsigned uSeqCount = GetSeqCount();
	Log("Weights:\n");
	WEIGHT wTotal = 0;
	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		wTotal += GetSeqWeight(n);
		Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n));
		}
	Log("Total weights = %6.3f, should be 1.0\n", wTotal);
	}
Beispiel #2
0
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();
	const char* seqName;
	memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
	WEIGHT wTotal = 0;
	FCOUNT fGap = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT w = GetSeqWeight(uSeqIndex);
		if (IsGap(uSeqIndex, uColIndex))
			{
			fGap += w;
			continue;
			}
		else if (IsWildcard(uSeqIndex, uColIndex))
			{
			const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				switch (uLetter)
					{
				case AX_B:		// D or N
					fcCounts[AX_D] += w/2;
					fcCounts[AX_N] += w/2;
					break;
				case AX_Z:		// E or Q
					fcCounts[AX_E] += w/2;
					fcCounts[AX_Q] += w/2;
					break;
				default:		// any
					{
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
					}
				break;

			case ALPHA_DNA:
			case ALPHA_RNA:
				switch (uLetter)
					{
				case AX_R:	// G or A
					fcCounts[NX_G] += w/2;
					fcCounts[NX_A] += w/2;
					break;
				case AX_Y:	// C or T/U
					fcCounts[NX_C] += w/2;
					fcCounts[NX_T] += w/2;
					break;
				default:	// any
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
				break;

			default:
				Quit("Alphabet %d not supported", g_Alpha);
				}
			continue;
			}
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);

		//BEGIN MODIFICATIONS TO MUSCLE

		int original=0;
		for(unsigned i=0; i<uColIndex; i++){
			if (i >= this->GetColCount()){break;}
			++original;
			char c = GetChar(uSeqIndex, i);
			if(c== '-'){
				original--;
			}
		}
		seqName = this->GetSeqName(uSeqIndex);
		int compositeVectPosition;
		compositeVectPosition = atoi(seqName);
		CompositeVect CV = *CVLocation;

		Composite* CVL = CV[compositeVectPosition];
		Composite C = *CVL;
		
		for(int j=0; j<21; j++){
			fcCounts[j] = w*C[original][j];
			wTotal = w*C[original][j];
		}

		//ORIGINAL MUSLCE LINE WAS:
		//fcCounts[uLetter] += w;
		//wTotal += w;
		//END MODIFICATIONS TO MUSCLE

		}
	*ptrfOcc = (float) (1.0 - fGap);

	if (bNormalize && wTotal > 0)
		{
		if (wTotal > 1.001)
			Quit("wTotal=%g\n", wTotal);
		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
			fcCounts[uLetter] /= wTotal;
//		AssertNormalized(fcCounts);
		}

	FCOUNT fcStartCount = 0;
	if (uColIndex == 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT fcEndCount = 0;
	if (uColCount - 1 == uColIndex)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT LL = 0;
	FCOUNT LG = 0;
	FCOUNT GL = 0;
	FCOUNT GG = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		WEIGHT w = GetSeqWeight(uSeqIndex);
		bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
		bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
		if (bLetterHere)
			{
			if (bLetterPrev)
				LL += w;
			else
				GL += w;
			}
		else
			{
			if (bLetterPrev)
				LG += w;
			else
				GG += w;
			}
		}

	FCOUNT fcExtendCount = 0;
	if (uColIndex > 0 && uColIndex < GetColCount() - 1)
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
			  IsGap(uSeqIndex, uColIndex + 1))
				fcExtendCount += GetSeqWeight(uSeqIndex);

	*ptrfcLL = LL;
	*ptrfcLG = LG;
	*ptrfcGL = GL;
	*ptrfcGG = GG;
	*ptrfcGapStart = fcStartCount;
	*ptrfcGapEnd = fcEndCount;
	*ptrfcGapExtend = fcExtendCount;
	}
Beispiel #3
0
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();

	memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
	WEIGHT wTotal = 0;
	FCOUNT fGap = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT w = GetSeqWeight(uSeqIndex);
		if (IsGap(uSeqIndex, uColIndex))
			{
			fGap += w;
			continue;
			}
		else if (IsWildcard(uSeqIndex, uColIndex))
			{
			const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				switch (uLetter)
					{
				case AX_B:		// D or N
					fcCounts[AX_D] += w/2;
					fcCounts[AX_N] += w/2;
					break;
				case AX_Z:		// E or Q
					fcCounts[AX_E] += w/2;
					fcCounts[AX_Q] += w/2;
					break;
				default:		// any
					{
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
					}
				break;

			case ALPHA_DNA:
			case ALPHA_RNA:
				switch (uLetter)
					{
				case AX_R:	// G or A
					fcCounts[NX_G] += w/2;
					fcCounts[NX_A] += w/2;
					break;
				case AX_Y:	// C or T/U
					fcCounts[NX_C] += w/2;
					fcCounts[NX_T] += w/2;
					break;
				default:	// any
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
				break;

			default:
				Quit("Alphabet %d not supported", g_Alpha);
				}
			continue;
			}
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		fcCounts[uLetter] += w;
		wTotal += w;
		}
	*ptrfOcc = (float) (1.0 - fGap);

	if (bNormalize && wTotal > 0)
		{
		if (wTotal > 1.001)
			Quit("wTotal=%g\n", wTotal);
		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
			fcCounts[uLetter] /= wTotal;
//		AssertNormalized(fcCounts);
		}

	FCOUNT fcStartCount = 0;
	if (uColIndex == 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT fcEndCount = 0;
	if (uColCount - 1 == uColIndex)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT LL = 0;
	FCOUNT LG = 0;
	FCOUNT GL = 0;
	FCOUNT GG = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		WEIGHT w = GetSeqWeight(uSeqIndex);
		bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
		bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
		if (bLetterHere)
			{
			if (bLetterPrev)
				LL += w;
			else
				GL += w;
			}
		else
			{
			if (bLetterPrev)
				LG += w;
			else
				GG += w;
			}
		}

	FCOUNT fcExtendCount = 0;
	if (uColIndex > 0 && uColIndex < GetColCount() - 1)
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
			  IsGap(uSeqIndex, uColIndex + 1))
				fcExtendCount += GetSeqWeight(uSeqIndex);

	*ptrfcLL = LL;
	*ptrfcLG = LG;
	*ptrfcGL = GL;
	*ptrfcGG = GG;
	*ptrfcGapStart = fcStartCount;
	*ptrfcGapEnd = fcEndCount;
	*ptrfcGapExtend = fcExtendCount;
	}