コード例 #1
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Return true if the given column has no gaps and all
// its residues are in the same biochemical group.
bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
	{
	extern unsigned ResidueGroup[];

	const unsigned uSeqCount = msa.GetColCount();
	if (0 == uSeqCount)
		Quit("MSAColIsConservative: empty alignment");

	if (msa.IsGap(0, uColIndex))
		return false;

	unsigned uLetter = msa.GetLetterEx(0, uColIndex);
    // cppcheck-suppress uninitvar
	const unsigned uGroup = ResidueGroup[uLetter];

	for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (msa.IsGap(uSeqIndex, uColIndex))
			return false;
		uLetter = msa.GetLetter(uSeqIndex, uColIndex);
		if (ResidueGroup[uLetter] != uGroup)
			return false;
		}
	return true;
	}
コード例 #2
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
	{
	if (g_PPScore != PPSCORE_LE)
		Quit("FastScoreMSA_LASimple: LA");

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	const ProfPos *Prof = ProfileFromMSA(msa);

	if (0 != MatchScore)
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;

	SCORE scoreTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
		SCORE scoreSeq = 0;
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const ProfPos &PP = Prof[uColIndex];
			if (msa.IsGap(uSeqIndex, uColIndex))
				{
				bool bOpen = (0 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex - 1));
				bool bClose = (uColCount - 1 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex + 1));

				if (bOpen)
					scoreSeq += PP.m_scoreGapOpen;
				if (bClose)
					scoreSeq += PP.m_scoreGapClose;
				//if (!bOpen && !bClose)
				//	scoreSeq += PP.m_scoreGapExtend;
				}
			else if (msa.IsWildcard(uSeqIndex, uColIndex))
				continue;
			else
				{
				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
				const SCORE scoreMatch = PP.m_AAScores[uLetter];
				if (0 != MatchScore)
					MatchScore[uColIndex] += weightSeq*scoreMatch;
				scoreSeq += scoreMatch;
				}
			}
		scoreTotal += weightSeq*scoreSeq;
		}

	delete[] Prof;
	return scoreTotal;
	}
コード例 #3
0
ファイル: mhack.cpp プロジェクト: Unode/ext_apps
void MHackEnd(MSA &msa)
	{
	if (ALPHA_Amino != g_Alpha)
		return;
	if (0 == M)
		return;

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa.GetSeqId(uSeqIndex);
		if (M[uId])
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
				{
				if (!msa.IsGap(uSeqIndex, uColIndex))
					{
					msa.SetChar(uSeqIndex, uColIndex, 'M');
					break;
					}
				}
			}
		}

	delete[] M;
	M = 0;
	}
コード例 #4
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
static double GetColScore(const MSA &msa, unsigned uCol)
	{
    MuscleContext *d = getMuscleContext();
    unsigned &g_AlphaSize = d->alpha.g_AlphaSize;
    ALPHA &g_Alpha = d->alpha.g_Alpha;

	const unsigned uSeqCount = msa.GetSeqCount();
	unsigned uPairCount = 0;
	double dSum = 0.0;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		if (msa.IsGap(uSeq1, uCol))
			continue;
		unsigned uLetter1 = msa.GetLetterEx(uSeq1, uCol);
		if (uLetter1 >= g_AlphaSize)
			continue;
		for (unsigned uSeq2 = uSeq1 + 1; uSeq2 < uSeqCount; ++uSeq2)
			{
			if (msa.IsGap(uSeq2, uCol))
				continue;
			unsigned uLetter2 = msa.GetLetterEx(uSeq2, uCol);
			if (uLetter2 >= g_AlphaSize)
				continue;
			double Score;
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				Score = VTML_SP[uLetter1][uLetter2];
				break;
			case ALPHA_DNA:
			case ALPHA_RNA:
				Score = NUC_SP[uLetter1][uLetter2];
				break;
			default:
				Quit("GetColScore: invalid alpha=%d", g_Alpha);
				}
			dSum += Score;
			++uPairCount;
			}
		}
	if (0 == uPairCount)
		return 0;
	return dSum / uPairCount;
	}
コード例 #5
0
ファイル: scoregaps.cpp プロジェクト: ggrekhov/ugene
// TODO: This could be much faster, no need to look
// at all columns.
static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)
	{
    MuscleContext *ctx = getMuscleContext();
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
	const unsigned ColCount = msa.GetColCount();
	bool InGap = false;
	bool Intersects = false;
	unsigned Start = uInsane;
	for (unsigned Col = 0; Col <= ColCount; ++Col)
		{
		bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));
		if (Gap)
			{
			if (!InGap)
				{
				InGap = true;
				Start = Col;
				}
			if (g_ColDiff[Col])
				Intersects = true;
			}
		else if (InGap)
			{
			InGap = false;
			if (Intersects)
				{
				GAPINFO *GI = NewGapInfo();
				GI->Start = Start;
				GI->End = Col - 1;
				GI->Next = g_Gaps[SeqIndex];
				g_Gaps[SeqIndex] = GI;
				}
			Intersects = false;
			}
		}
	}
コード例 #6
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #7
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping1 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping2 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen;
		scoreGaps += TermGapScore(true);
		}
	return scoreGaps;
	}
コード例 #8
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	g_ptrMSA1.get() = &msa1;
	g_ptrMSA2.get() = &msa2;
	g_uSeqIndex1.get() = uSeqIndex1;
	g_uSeqIndex2.get() = uSeqIndex2;

	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPair, different lengths");

#if	TRACE
	Log("ScoreSeqPair\n");
	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex1, i));
	Log("\n");
	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex2, i));
	Log("\n");
#endif

	SCORE scoreTotal = 0;

// Substitution scores
	unsigned uFirstLetter1 = uInsane;
	unsigned uFirstLetter2 = uInsane;
	unsigned uLastLetter1 = uInsane;
	unsigned uLastLetter2 = uInsane;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);

		if (!bGap1)
			{
			if (uInsane == uFirstLetter1)
				uFirstLetter1 = uColIndex;
			uLastLetter1 = uColIndex;
			}
		if (!bGap2)
			{
			if (uInsane == uFirstLetter2)
				uFirstLetter2 = uColIndex;
			uLastLetter2 = uColIndex;
			}

		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
			continue;

		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);

		SCORE scoreMatch = (*g_ptrScoreMatrix.get())[uLetter1][uLetter2];
		scoreTotal += scoreMatch;
#if	TRACE
		Log("%c <-> %c = %7.1f  %10.1f\n",
		  msa1.GetChar(uSeqIndex1, uColIndex),
		  msa2.GetChar(uSeqIndex2, uColIndex),
		  scoreMatch,
		  scoreTotal);
#endif
		}
	
	*ptrLetters = scoreTotal;

// Gap penalties
	unsigned uGapLength = uInsane;
	unsigned uGapStartCol = uInsane;
	bool bGapping1 = false;
	bool bGapping2 = false;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGapping1)
			{
			if (bGap1)
				++uGapLength;
			else
				{
				bGapping1 = false;
				bool bNTerm = (uFirstLetter2 == uGapStartCol);
				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			continue;
			}
		else
			{
			if (bGap1)
				{
				uGapStartCol = uColIndex;
				bGapping1 = true;
				uGapLength = 1;
				continue;
				}
			}

		if (bGapping2)
			{
			if (bGap2)
				++uGapLength;
			else
				{
				bGapping2 = false;
				bool bNTerm = (uFirstLetter1 == uGapStartCol);
				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			}
		else
			{
			if (bGap2)
				{
				uGapStartCol = uColIndex;
				bGapping2 = true;
				uGapLength = 1;
				}
			}
		}

	if (bGapping1 || bGapping2)
		{
		SCORE scoreGap = GapPenalty(uGapLength, true);
		scoreTotal += scoreGap;
#if	TRACE
		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
		Log("GAP         %7.1f  %10.1f\n",
		  scoreGap,
		  scoreTotal);
#endif
		}
	*ptrGaps = scoreTotal - *ptrLetters;
	return scoreTotal;
	}
コード例 #9
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
/* a version of ScoreSeqPairGaps that computes a per-residue score */
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE MatchScore[] )
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	unsigned gap_left_col = 0;
	SCORE cur_gap_score = 0;
	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping1 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping2 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		if( MatchScore != NULL && (bGapping1 || bGapping2) )
		{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColIndex-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColIndex; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			gap_left_col = uInsane;
			cur_gap_score = 0;
		}
		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen.get();
		scoreGaps += TermGapScore(true);
		cur_gap_score -= g_scoreGapOpen.get();
		cur_gap_score += TermGapScore(true);

		if( MatchScore != NULL )
			{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColCount-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColCount; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			}
		}
	return scoreGaps;
	}