示例#1
0
char ConsensusChar(const ProfPos &PP)
	{
	unsigned uMostCommonLetter = 0;
	FCOUNT fcMostCommon = PP.m_fcCounts[0];
	bool bMoreThanOneLetter = false;
	bool bAnyLetter = false;
	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
		{
		const FCOUNT fc = PP.m_fcCounts[uLetter];
		if (fc > 0)
			{
			if (bAnyLetter)
				bMoreThanOneLetter = true;
			bAnyLetter = true;
			}
		if (fc > fcMostCommon)
			{
			uMostCommonLetter = uLetter;
			fcMostCommon = fc;
			}
		}
	if (!bAnyLetter)
		return '-';
	char c = LetterToChar(uMostCommonLetter);
	if (bMoreThanOneLetter)
		return UnalignChar(c);
	return c;
	}
示例#2
0
static char *TupleToStr(int t)
	{
	static char s[K];

	for (int i = 0; i < K; ++i)
		{
		unsigned Letter = (t/(pow4(i)))%4;
		assert(Letter >= 0 && Letter < 4);
		s[K-i-1] = LetterToChar(Letter);
		}

	return s;
	}
示例#3
0
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
  unsigned uLengthY, DiagList &DL)
	{
    MuscleContext *ctx = getMuscleContext();
    ALPHA &g_Alpha = ctx->alpha.g_Alpha;
    unsigned* TuplePos = ctx->finddiagsn.TuplePos;
    unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength;

	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
		Quit("FindDiagsNuc: requires nucleo alphabet");

	DL.Clear();

// 16 is arbitrary slop, no principled reason for this.
	if (uLengthX < K + 16 || uLengthY < K + 16)
		return;

// Set A to shorter profile, B to longer
	const ProfPos *PA;
	const ProfPos *PB;
	unsigned uLengthA;
	unsigned uLengthB;
	bool bSwap;
	if (uLengthX < uLengthY)
		{
		bSwap = false;
		PA = PX;
		PB = PY;
		uLengthA = uLengthX;
		uLengthB = uLengthY;
		}
	else
		{
		bSwap = true;
		PA = PY;
		PB = PX;
		uLengthA = uLengthY;
		uLengthB = uLengthX;
		}

#if	TRACE
	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
#endif

// Build tuple map for the longer profile, B
	if (uLengthB < K)
		Quit("FindDiags: profile too short");

	memset(TuplePos, EMPTY, sizeof(TuplePos));

	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
		{
		const unsigned uTuple = GetTuple(PB, uPos);
		if (EMPTY == uTuple)
			continue;
		TuplePos[uTuple] = uPos;
		}

// Find matches
	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
		{
		const unsigned uTuple = GetTuple(PA, uPosA);
		if (EMPTY == uTuple)
			continue;
		const unsigned uPosB = TuplePos[uTuple];
		if (EMPTY == uPosB)
			continue;

	// This tuple is found in both profiles
		unsigned uStartPosA = uPosA;
		unsigned uStartPosB = uPosB;

	// Try to extend the match forwards
		unsigned uEndPosA = uPosA + K - 1;
		unsigned uEndPosB = uPosB + K - 1;
		for (;;)
			{
			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
				break;
			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
				break;
			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
				break;
			if (uAAGroupA != uAAGroupB)
				break;
			++uEndPosA;
			++uEndPosB;
			}
		uPosA = uEndPosA;

#if	TRACE
		{
		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
		Log("\n");
		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
		Log("\n");
		}
#endif

		const unsigned uLength = uEndPosA - uStartPosA + 1;
		assert(uEndPosB - uStartPosB + 1 == uLength);

		if (uLength >= g_uMinDiagLength)
			{
			if (bSwap)
				DL.Add(uStartPosB, uStartPosA, uLength);
			else
				DL.Add(uStartPosA, uStartPosB, uLength);
			}
		}
	}