コード例 #1
0
ファイル: aligngivenpathsw.cpp プロジェクト: Wyss/mauve-py
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("AlignTwoMSAsGivenPathSW\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
	}
コード例 #2
0
ファイル: sptest.cpp プロジェクト: bigmuscle/bigmuscle
void SPTest()
	{
	SetPPScore(PPSCORE_SV);

	SetListFileName("c:\\tmp\\muscle.log", false);

	TextFile file1("c:\\tmp\\msa1.afa");
	TextFile file2("c:\\tmp\\msa2.afa");

	MSA msa1;
	MSA msa2;

	msa1.FromFile(file1);
	msa2.FromFile(file2);

	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();

	const unsigned uColCount = msa1.GetColCount();
	if (msa2.GetColCount() != uColCount)
		Quit("Different lengths");

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();
	const unsigned uSeqCount = uSeqCount1 + uSeqCount2;

	MSA::SetIdCount(uSeqCount);

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		msa1.SetSeqWeight(uSeqIndex1, 1.0);
		msa1.SetSeqId(uSeqIndex1, uSeqIndex1);
		}

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		{
		msa2.SetSeqWeight(uSeqIndex2, 1.0);
		msa2.SetSeqId(uSeqIndex2, uSeqCount1 + uSeqIndex2);
		}

	MSA alnA;
	MSA alnB;

	char strPathA[1024];
	char strPathB[1024];
	MakePath(uColCount, INDELS, strPathA);
	MakePath(uColCount, INDELS, strPathB);

	PWPath PathA;
	PWPath PathB;
	PathA.FromStr(strPathA);
	PathB.FromStr(strPathB);

	Log("PathA=\n");
	PathA.LogMe();
	Log("PathB=\n");
	PathB.LogMe();

	AlignTwoMSAsGivenPath(PathA, msa1, msa2, alnA);
	AlignTwoMSAsGivenPath(PathB, msa1, msa2, alnB);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		alnA.SetSeqWeight(uSeqIndex, 1.0);
		alnB.SetSeqWeight(uSeqIndex, 1.0);
		}

	unsigned Seqs1[1024];
	unsigned Seqs2[1024];

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		Seqs1[uSeqIndex1] = uSeqIndex1;

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		Seqs2[uSeqIndex2] = uSeqCount1 + uSeqIndex2;

	MSA msaA1;
	MSA msaA2;
	MSA msaB1;
	MSA msaB2;
	MSAFromSeqSubset(alnA, Seqs1, uSeqCount1, msaA1);
	MSAFromSeqSubset(alnB, Seqs1, uSeqCount1, msaB1);
	MSAFromSeqSubset(alnA, Seqs2, uSeqCount2, msaA2);
	MSAFromSeqSubset(alnB, Seqs2, uSeqCount2, msaB2);

	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		msaA1.SetSeqWeight(uSeqIndex1, 1.0);
		msaB1.SetSeqWeight(uSeqIndex1, 1.0);
		}

	for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
		{
		msaA2.SetSeqWeight(uSeqIndex2, 1.0);
		msaB2.SetSeqWeight(uSeqIndex2, 1.0);
		}

	Log("msaA1=\n");
	msaA1.LogMe();

	Log("msaB1=\n");
	msaB1.LogMe();

	Log("msaA2=\n");
	msaA2.LogMe();

	Log("msaB2=\n");
	msaB2.LogMe();

	Log("alnA=\n");
	alnA.LogMe();

	Log("AlnB=\n");
	alnB.LogMe();

	Log("\nSPA\n---\n");
	SCORE SPA = ObjScoreSP(alnA);
	Log("\nSPB\n---\n");
	SCORE SPB = ObjScoreSP(alnB);

	Log("\nXPA\n---\n");
	SCORE XPA = ObjScoreXP(msaA1, msaA2);
	Log("\nXPB\n---\n");
	SCORE XPB = ObjScoreXP(msaB1, msaB2);

	Log("SPA=%.4g SPB=%.4g Diff=%.4g\n", SPA, SPB, SPA - SPB);
	Log("XPA=%.4g XPB=%.4g Diff=%.4g\n", XPA, XPB, XPA - XPB);
	}
コード例 #3
0
ファイル: scoregaps.cpp プロジェクト: ggrekhov/ugene
SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)
	{
    MuscleContext *ctx = getMuscleContext();
    unsigned &g_ColCount = ctx->scoregaps.g_ColCount;
    unsigned &g_MaxSeqCount = ctx->scoregaps.g_MaxSeqCount;
    unsigned &g_MaxColCount = ctx->scoregaps.g_MaxColCount;
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
    
#if	TRACE
	{
	Log("ScoreGaps\n");
	Log("DiffCols ");
	for (unsigned i = 0; i < DiffColCount; ++i)
		Log(" %u", DiffCols[i]);
	Log("\n");
	Log("msa=\n");
	msa.LogMe();
	Log("\n");
	}
#endif
	const unsigned SeqCount = msa.GetSeqCount();
	const unsigned ColCount = msa.GetColCount();
	g_ColCount = ColCount;

	if (SeqCount > g_MaxSeqCount)
		{
		delete[] g_Gaps;
		g_MaxSeqCount = SeqCount + 256;
		g_Gaps = new GAPINFO *[g_MaxSeqCount];
		}
	memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));

	if (ColCount > g_MaxColCount)
		{
		delete[] g_ColDiff;
		g_MaxColCount = ColCount + 256;
		g_ColDiff = new bool[g_MaxColCount];
		}

	memset(g_ColDiff, 0, g_ColCount*sizeof(bool));
	for (unsigned i = 0; i < DiffColCount; ++i)
		{
		unsigned Col = DiffCols[i];
		assert(Col < ColCount);
		g_ColDiff[Col] = true;
		}

	for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
		FindIntersectingGaps(msa, SeqIndex);

#if	TRACE
	{
	Log("\n");
	Log("Intersecting gaps:\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%c", g_ColDiff[Col] ? '*' : ' ');
	Log("\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%d", Col%10);
	Log("\n");
	for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
		{
		Log("%3d:  ", Seq);
		for (unsigned Col = 0; Col < ColCount; ++Col)
			Log("%c", msa.GetChar(Seq, Col));
		Log("  :: ");
		for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)
			Log(" (%d,%d)", GI->Start, GI->End);
		Log("  >%s\n", msa.GetSeqName(Seq));
		}
	Log("\n");
	}
#endif

	SCORE Score = 0;
	for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(Seq1);
		for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(Seq2);
//			const SCORE Pair = ScorePair(Seq1, Seq2);
			const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);
			Score += w1*w2*Pair;
#if	TRACE
			Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",
			  Seq1, Seq2, Pair, w1, w2, Score);
#endif
			}
		}

	return Score;
	}
コード例 #4
0
ファイル: refinevert.cpp プロジェクト: bigmuscle/bigmuscle
// Return true if any changes made
bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
	{
	bool bAnyChanges = false;

	const unsigned uColCountIn = msaIn.GetColCount();
	const unsigned uSeqCountIn = msaIn.GetSeqCount();

	if (uColCountIn < 3 || uSeqCountIn < 3)
		return false;

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;
	SetMSAWeightsMuscle(msaIn);
	FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	MSA msaOut;
	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
		MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange.LogMe();
#endif

		bool bLockLeft = (0 != uRangeIndex);
		bool bLockRight = (uRangeCount - 1 != uRangeIndex);
		bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
		bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);

#if	TRACE
		Log("After:\n");
		msaRange.LogMe();
#endif

		MSAAppend(msaOut, msaRange);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

#if	DEBUG
// Sanity check
	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
#endif

	delete[] Ranges;
	if (bAnyChanges)
		msaIn.Copy(msaOut);
	return bAnyChanges;
	}
コード例 #5
0
ファイル: objscore2.cpp プロジェクト: Unode/ext_apps
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #6
0
ファイル: refinesubfams.cpp プロジェクト: Unode/ext_apps
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	bool *Ready = new bool[uNodeCount];
	MSA **MSAs = new MSA *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		Ready[uNodeIndex] = false;
		MSAs[uNodeIndex] = 0;
		}

	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uNodeIndex = Subfams[uSubfamIndex];
		Ready[uNodeIndex] = true;
		MSA *ptrMSA = new MSA;
	// TODO: Wasteful copy, needs re-design
		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
		MSAs[uNodeIndex] = ptrMSA;
		}

	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex;
	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;

		unsigned uRight = tree.GetRight(uNodeIndex);
		unsigned uLeft = tree.GetLeft(uNodeIndex);
		if (!Ready[uRight] || !Ready[uLeft])
			continue;

		MSA *ptrLeft = MSAs[uLeft];
		MSA *ptrRight = MSAs[uRight];
		assert(ptrLeft != 0 && ptrRight != 0);

		MSA *ptrParent = new MSA;

		PWPath Path;
		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);

		MSAs[uNodeIndex] = ptrParent;
		Ready[uNodeIndex] = true;
		Ready[uLeft] = false;
		Ready[uRight] = false;

		delete MSAs[uLeft];
		delete MSAs[uRight];
		MSAs[uLeft] = 0;
		MSAs[uRight] = 0;
		}

#if	DEBUG
	{
	unsigned uReadyCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Ready[uNodeIndex])
			{
			assert(tree.IsRoot(uNodeIndex));
			++uReadyCount;
			assert(0 != MSAs[uNodeIndex]);
			}
		else
			assert(0 == MSAs[uNodeIndex]);
		}
	assert(1 == uReadyCount);
	}
#endif

	const unsigned uRoot = tree.GetRootNodeIndex();
	MSA *ptrRootAlignment = MSAs[uRoot];

	msa.Copy(*ptrRootAlignment);

	delete ptrRootAlignment;

#if	TRACE
	Log("After refine subfamilies, root alignment=\n");
	msa.LogMe();
#endif
	}
コード例 #7
0
ファイル: objscore2.cpp プロジェクト: Unode/ext_apps
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #8
0
ファイル: objscore2.cpp プロジェクト: Unode/ext_apps
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping1 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping2 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen;
		scoreGaps += TermGapScore(true);
		}
	return scoreGaps;
	}
コード例 #9
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
/* a version of ScoreSeqPairGaps that computes a per-residue score */
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE MatchScore[] )
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	unsigned gap_left_col = 0;
	SCORE cur_gap_score = 0;
	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping1 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping2 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		if( MatchScore != NULL && (bGapping1 || bGapping2) )
		{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColIndex-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColIndex; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			gap_left_col = uInsane;
			cur_gap_score = 0;
		}
		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen.get();
		scoreGaps += TermGapScore(true);
		cur_gap_score -= g_scoreGapOpen.get();
		cur_gap_score += TermGapScore(true);

		if( MatchScore != NULL )
			{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColCount-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColCount; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			}
		}
	return scoreGaps;
	}
コード例 #10
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
// Return true if any changes made
void AnchoredProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
	{

	const unsigned uColCountIn = msa1.GetColCount();
	const unsigned uSeqCountIn = msa1.GetSeqCount() + msa2.GetSeqCount();

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;

	PrepareMSAforScoring(msa1);
	PrepareMSAforScoring(msa2);
	FindAnchorColsPP(msa1, msa2, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName;
		unsigned uId;
		if( uSeqIndex < msa1.GetSeqCount() )
			{
			msa1.SetSeqId(uSeqIndex, uSeqIndex);
			ptrName = msa1.GetSeqName(uSeqIndex);
			}
		else
			{
			msa2.SetSeqId(uSeqIndex-msa1.GetSeqCount(), uSeqIndex);
			ptrName = msa2.GetSeqName(uSeqIndex-msa1.GetSeqCount());
			}
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uSeqIndex);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange1;
		MSA msaRange2;
		MSA msaRangeOut;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
/*		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
*/
		MSAFromColRange(msa1, uFromColIndex, uRangeColCount, msaRange1);
		MSAFromColRange(msa2, uFromColIndex, uRangeColCount, msaRange2);
		StripGapColumns(msaRange1);
		StripGapColumns(msaRange2);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange1.LogMe();
		msaRange2.LogMe();
#endif

		ProfileProfile(msaRange1, msaRange2, msaRangeOut);

#if	TRACE
		Log("After:\n");
		msaRangeOut.LogMe();
#endif
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
			msaRangeOut.SetSeqId(uSeqIndex, uSeqIndex);

		MSAAppend(msaOut, msaRangeOut);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

	delete[] Ranges;
	}
コード例 #11
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
// this is a version of the profile x profile score that computes
// a per-site score suitable for use with anchoring heuristics
SCORE LetterObjScoreXP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE* mmScore = NULL;
	SCORE* ggScore = NULL;
	if( MatchScore != NULL )
		{
		mmScore = new SCORE[uColCount1];
		ggScore = new SCORE[uColCount1];
		memset( MatchScore, 0, sizeof(SCORE)*uColCount1 );
		}

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			if( mmScore != NULL )
				memset( mmScore, 0, sizeof(SCORE)*uColCount1 );
			if( ggScore != NULL )
				memset( ggScore, 0, sizeof(SCORE)*uColCount1 );
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2, mmScore);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2, ggScore);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w*scorePair;
			++uPairCount;
			if( MatchScore != NULL )
				for( unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex )
					MatchScore[uColIndex] += w*(mmScore[uColIndex]+ggScore[uColIndex]);
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	if(	mmScore != NULL )
		delete[] mmScore;
	if( ggScore != NULL )
		delete[] ggScore;

	return scoreTotal;
	}