Пример #1
0
// The XP score is the sum of the score of each pair of
// sequences between two profiles which are aligned to
// each other. Notice that for two given profiles aligned
// in different ways, the difference in XP score must be
// the same as the difference in SP score because the
// score of a pair of sequences in one profile doesn't
// depend on the alignment.
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w1*w2*scorePair;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
Пример #2
0
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	msa.LogMe();
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif
	SCORE TotalLetters = 0;
	SCORE TotalGaps = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE Letters;
			SCORE Gaps;
			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
			  &Letters, &Gaps);
			scoreTotal += w1*w2*scorePair;
			TotalLetters += w1*w2*Letters;
			TotalGaps += w1*w2*Gaps;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  uSeqIndex1,
			  msa.GetSeqName(uSeqIndex1),
			  uSeqIndex2,
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
	*ptrLetters = TotalLetters;
	*ptrGaps = TotalGaps;
	return scoreTotal;
	}
Пример #3
0
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
	{
	if (g_PPScore != PPSCORE_LE)
		Quit("FastScoreMSA_LASimple: LA");

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	const ProfPos *Prof = ProfileFromMSA(msa);

	if (0 != MatchScore)
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;

	SCORE scoreTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
		SCORE scoreSeq = 0;
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const ProfPos &PP = Prof[uColIndex];
			if (msa.IsGap(uSeqIndex, uColIndex))
				{
				bool bOpen = (0 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex - 1));
				bool bClose = (uColCount - 1 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex + 1));

				if (bOpen)
					scoreSeq += PP.m_scoreGapOpen;
				if (bClose)
					scoreSeq += PP.m_scoreGapClose;
				//if (!bOpen && !bClose)
				//	scoreSeq += PP.m_scoreGapExtend;
				}
			else if (msa.IsWildcard(uSeqIndex, uColIndex))
				continue;
			else
				{
				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
				const SCORE scoreMatch = PP.m_AAScores[uLetter];
				if (0 != MatchScore)
					MatchScore[uColIndex] += weightSeq*scoreMatch;
				scoreSeq += scoreMatch;
				}
			}
		scoreTotal += weightSeq*scoreSeq;
		}

	delete[] Prof;
	return scoreTotal;
	}
Пример #4
0
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
Пример #5
0
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
	{
    MuscleContext *ctx = getMuscleContext();
	SCOREMATRIX &Mx = *ctx->params.g_ptrScoreMatrix;
    unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize;

	const unsigned uSeqCount = msa.GetSeqCount();

#if	BRUTE_LETTERS
	SCORE BruteScore = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
			if (uLetter2 >= g_AlphaSize)
				continue;
			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
			}
		}
#endif
	
	double N = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		N += w;
		}
	if (N <= 0)
		return 0;

	FCOUNT Freqs[20];
	memset(Freqs, 0, sizeof(Freqs));
	SCORE Score = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter >= g_AlphaSize)
			continue;
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		Freqs[uLetter] += w;
		Score -= w*w*Mx[uLetter][uLetter];
		}

	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
		{
		const FCOUNT f1 = Freqs[uLetter1];
		Score += f1*f1*Mx[uLetter1][uLetter1];
		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
			{
			const FCOUNT f2 = Freqs[uLetter2];
			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
			}
		}
	Score /= 2;
#if	BRUTE_LETTERS
	assert(BTEq(BruteScore, Score));
#endif
	return Score;
	}
Пример #6
0
SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)
	{
    MuscleContext *ctx = getMuscleContext();
    unsigned &g_ColCount = ctx->scoregaps.g_ColCount;
    unsigned &g_MaxSeqCount = ctx->scoregaps.g_MaxSeqCount;
    unsigned &g_MaxColCount = ctx->scoregaps.g_MaxColCount;
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
    
#if	TRACE
	{
	Log("ScoreGaps\n");
	Log("DiffCols ");
	for (unsigned i = 0; i < DiffColCount; ++i)
		Log(" %u", DiffCols[i]);
	Log("\n");
	Log("msa=\n");
	msa.LogMe();
	Log("\n");
	}
#endif
	const unsigned SeqCount = msa.GetSeqCount();
	const unsigned ColCount = msa.GetColCount();
	g_ColCount = ColCount;

	if (SeqCount > g_MaxSeqCount)
		{
		delete[] g_Gaps;
		g_MaxSeqCount = SeqCount + 256;
		g_Gaps = new GAPINFO *[g_MaxSeqCount];
		}
	memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));

	if (ColCount > g_MaxColCount)
		{
		delete[] g_ColDiff;
		g_MaxColCount = ColCount + 256;
		g_ColDiff = new bool[g_MaxColCount];
		}

	memset(g_ColDiff, 0, g_ColCount*sizeof(bool));
	for (unsigned i = 0; i < DiffColCount; ++i)
		{
		unsigned Col = DiffCols[i];
		assert(Col < ColCount);
		g_ColDiff[Col] = true;
		}

	for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
		FindIntersectingGaps(msa, SeqIndex);

#if	TRACE
	{
	Log("\n");
	Log("Intersecting gaps:\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%c", g_ColDiff[Col] ? '*' : ' ');
	Log("\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%d", Col%10);
	Log("\n");
	for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
		{
		Log("%3d:  ", Seq);
		for (unsigned Col = 0; Col < ColCount; ++Col)
			Log("%c", msa.GetChar(Seq, Col));
		Log("  :: ");
		for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)
			Log(" (%d,%d)", GI->Start, GI->End);
		Log("  >%s\n", msa.GetSeqName(Seq));
		}
	Log("\n");
	}
#endif

	SCORE Score = 0;
	for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(Seq1);
		for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(Seq2);
//			const SCORE Pair = ScorePair(Seq1, Seq2);
			const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);
			Score += w1*w2*Pair;
#if	TRACE
			Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",
			  Seq1, Seq2, Pair, w1, w2, Score);
#endif
			}
		}

	return Score;
	}
Пример #7
0
// this is a version of the profile x profile score that computes
// a per-site score suitable for use with anchoring heuristics
SCORE LetterObjScoreXP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE* mmScore = NULL;
	SCORE* ggScore = NULL;
	if( MatchScore != NULL )
		{
		mmScore = new SCORE[uColCount1];
		ggScore = new SCORE[uColCount1];
		memset( MatchScore, 0, sizeof(SCORE)*uColCount1 );
		}

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			if( mmScore != NULL )
				memset( mmScore, 0, sizeof(SCORE)*uColCount1 );
			if( ggScore != NULL )
				memset( ggScore, 0, sizeof(SCORE)*uColCount1 );
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2, mmScore);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2, ggScore);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w*scorePair;
			++uPairCount;
			if( MatchScore != NULL )
				for( unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex )
					MatchScore[uColIndex] += w*(mmScore[uColIndex]+ggScore[uColIndex]);
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	if(	mmScore != NULL )
		delete[] mmScore;
	if( ggScore != NULL )
		delete[] ggScore;

	return scoreTotal;
	}