コード例 #1
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Return true if the given column has no gaps and all
// its residues are in the same biochemical group.
bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
	{
	extern unsigned ResidueGroup[];

	const unsigned uSeqCount = msa.GetColCount();
	if (0 == uSeqCount)
		Quit("MSAColIsConservative: empty alignment");

	if (msa.IsGap(0, uColIndex))
		return false;

	unsigned uLetter = msa.GetLetterEx(0, uColIndex);
    // cppcheck-suppress uninitvar
	const unsigned uGroup = ResidueGroup[uLetter];

	for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (msa.IsGap(uSeqIndex, uColIndex))
			return false;
		uLetter = msa.GetLetter(uSeqIndex, uColIndex);
		if (ResidueGroup[uLetter] != uGroup)
			return false;
		}
	return true;
	}
コード例 #2
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
static double GetColScore(const MSA &msa, unsigned uCol)
	{
    MuscleContext *d = getMuscleContext();
    unsigned &g_AlphaSize = d->alpha.g_AlphaSize;
    ALPHA &g_Alpha = d->alpha.g_Alpha;

	const unsigned uSeqCount = msa.GetSeqCount();
	unsigned uPairCount = 0;
	double dSum = 0.0;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		if (msa.IsGap(uSeq1, uCol))
			continue;
		unsigned uLetter1 = msa.GetLetterEx(uSeq1, uCol);
		if (uLetter1 >= g_AlphaSize)
			continue;
		for (unsigned uSeq2 = uSeq1 + 1; uSeq2 < uSeqCount; ++uSeq2)
			{
			if (msa.IsGap(uSeq2, uCol))
				continue;
			unsigned uLetter2 = msa.GetLetterEx(uSeq2, uCol);
			if (uLetter2 >= g_AlphaSize)
				continue;
			double Score;
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				Score = VTML_SP[uLetter1][uLetter2];
				break;
			case ALPHA_DNA:
			case ALPHA_RNA:
				Score = NUC_SP[uLetter1][uLetter2];
				break;
			default:
				Quit("GetColScore: invalid alpha=%d", g_Alpha);
				}
			dSum += Score;
			++uPairCount;
			}
		}
	if (0 == uPairCount)
		return 0;
	return dSum / uPairCount;
	}
コード例 #3
0
ファイル: scoredist.cpp プロジェクト: Unode/ext_apps
// Similarity score
static double Sigma(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
  unsigned *ptrLength)
	{
	unsigned Length = 0;
	double Score = 0;
	const unsigned ColCount = msa.GetColCount();
	for (unsigned ColIndex = 0; ColIndex < ColCount; ++ColIndex)
		{
		unsigned Letter1 = msa.GetLetterEx(SeqIndex1, ColIndex);
		unsigned Letter2 = msa.GetLetterEx(SeqIndex2, ColIndex);
		if (Letter1 >= 20 || Letter2 >= 20)
			continue;
		++Length;
		Score += BLOSUM62[Letter1][Letter2];
		}

	*ptrLength = Length;
	return Score;
	}
コード例 #4
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #5
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
	{
    MuscleContext *ctx = getMuscleContext();
	SCOREMATRIX &Mx = *ctx->params.g_ptrScoreMatrix;
    unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize;

	const unsigned uSeqCount = msa.GetSeqCount();

#if	BRUTE_LETTERS
	SCORE BruteScore = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
			if (uLetter2 >= g_AlphaSize)
				continue;
			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
			}
		}
#endif
	
	double N = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		N += w;
		}
	if (N <= 0)
		return 0;

	FCOUNT Freqs[20];
	memset(Freqs, 0, sizeof(Freqs));
	SCORE Score = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter >= g_AlphaSize)
			continue;
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		Freqs[uLetter] += w;
		Score -= w*w*Mx[uLetter][uLetter];
		}

	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
		{
		const FCOUNT f1 = Freqs[uLetter1];
		Score += f1*f1*Mx[uLetter1][uLetter1];
		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
			{
			const FCOUNT f2 = Freqs[uLetter2];
			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
			}
		}
	Score /= 2;
#if	BRUTE_LETTERS
	assert(BTEq(BruteScore, Score));
#endif
	return Score;
	}
コード例 #6
0
ファイル: aln.cpp プロジェクト: ggrekhov/ugene
static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex)
	{
    ALPHA &g_Alpha = getMuscleContext()->alpha.g_Alpha;
	const unsigned uSeqCount = a.GetSeqCount();
	unsigned BitMap = 0;
	unsigned Count = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex);
		assert(uLetter < 32);
		unsigned Bit = (1 << uLetter);
		if (!(BitMap & Bit))
			++Count;
		BitMap |= Bit;
		}

//	'*' indicates positions which have a single, fully conserved residue
	if (1 == Count)
		return '*';

	if (ALPHA_Amino != g_Alpha)
		return ' ';

#define B(a)	(1 << AX_##a)
#define S2(a, b)		S(B(a) | B(b))
#define S3(a, b, c)		S(B(a) | B(b) | B(c))
#define S4(a, b, c, d)	S(B(a) | B(b) | B(c) | B(d))
#define S(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':';

#define W3(a, b, c)				W(B(a) | B(b) | B(c))
#define W4(a, b, c, d)			W(B(a) | B(b) | B(c) | B(d))
#define W5(a, b, c, d, e)		W(B(a) | B(b) | B(c) | B(d) | B(e))
#define W6(a, b, c, d, e, f)	W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f))
#define W(w)	if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.';

//	':' indicates that one of the following 'strong'
// groups is fully conserved
//                 STA
//                 NEQK
//                 NHQK
//                 NDEQ
//                 QHRK
//                 MILV
//                 MILF
//                 HY
//                 FYW
//
	S3(S, T, A)
	S4(N, E, Q, K)
	S4(N, H, Q, K)
	S4(N, D, E, Q)
	S4(M, I, L, V)
	S4(M, I, L, F)
	S2(H, Y)
	S3(F, Y, W)

//	'.' indicates that one of the following 'weaker' 
// groups is fully conserved
//                 CSA
//                 ATV
//                 SAG
//                 STNK
//                 STPA
//                 SGND
//                 SNDEQK
//                 NDEQHK
//                 NEQHRK
//                 FVLIM
//                 HFY
	W3(C, S, A)
	W3(A, T, V)
	W3(S, A, G)
	W4(S, T, N, K)
	W4(S, T, P, A)
	W4(S, G, N, D)
	W6(S, N, D, E, Q, K)
	W6(N, W, Q, H, R, K)
	W5(F, V, L, I, M)
	W3(H, F, Y)

	return ' ';
	}