コード例 #1
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The XP score is the sum of the score of each pair of
// sequences between two profiles which are aligned to
// each other. Notice that for two given profiles aligned
// in different ways, the difference in XP score must be
// the same as the difference in SP score because the
// score of a pair of sequences in one profile doesn't
// depend on the alignment.
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w1*w2*scorePair;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #2
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
  MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uInColCount = msaIn.GetColCount();

	if (uFromColIndex + uColCount - 1 > uInColCount)
		Quit("MSAFromColRange, out of bounds");

	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #3
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void AssertMSAEq(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();
	if (uSeqCount1 != uSeqCount2)
		Quit("Seq count differs");

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
		{
		Seq seq1;
		msa1.GetSeq(uSeqIndex, seq1);

		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);

		Seq seq2;
		msa2.GetSeq(uSeqIndex2, seq2);

		if (!seq1.Eq(seq2))
			{
			Log("Input:\n");
			seq1.LogMe();
			Log("Output:\n");
			seq2.LogMe();
			Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
			}
		}
	}
コード例 #4
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	msa.LogMe();
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif
	SCORE TotalLetters = 0;
	SCORE TotalGaps = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE Letters;
			SCORE Gaps;
			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
			  &Letters, &Gaps);
			scoreTotal += w1*w2*scorePair;
			TotalLetters += w1*w2*Letters;
			TotalGaps += w1*w2*Gaps;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  uSeqIndex1,
			  msa.GetSeqName(uSeqIndex1),
			  uSeqIndex2,
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
	*ptrLetters = TotalLetters;
	*ptrGaps = TotalGaps;
	return scoreTotal;
	}
コード例 #5
0
ファイル: subfam.cpp プロジェクト: Wyss/mauve-py
void AlignSubFam(SeqVect &vAll, const Tree &GuideTree, unsigned uNodeIndex,
  MSA &msaOut)
	{
	const unsigned uSeqCount = vAll.GetSeqCount();

	const char *InTmp = "asf_in.tmp";
	const char *OutTmp = "asf_out.tmp";

	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned uLeafCount;
	GetLeaves(GuideTree, uNodeIndex, Leaves, &uLeafCount);

	SeqVect v;
	for (unsigned i = 0; i < uLeafCount; ++i)
		{
		unsigned uLeafNodeIndex = Leaves[i];
		unsigned uId = GuideTree.GetLeafId(uLeafNodeIndex);
		Seq &s = vAll.GetSeqById(uId);
		v.AppendSeq(s);
		}

#if	TRACE
	{
	Log("Align subfam[node=%d, size=%d] ", uNodeIndex, uLeafCount);
	for (unsigned i = 0; i < uLeafCount; ++i)
		Log(" %s", v.GetSeqName(i));
	Log("\n");
	}
#endif

	TextFile fIn(InTmp, true);

	v.ToFASTAFile(fIn);
	fIn.Close();

	char CmdLine[4096];
	sprintf(CmdLine, "probcons %s > %s 2> /dev/null", InTmp, OutTmp);
//	sprintf(CmdLine, "muscle -in %s -out %s -maxiters 1", InTmp, OutTmp);
	system(CmdLine);

	TextFile fOut(OutTmp);
	msaOut.FromFile(fOut);

	for (unsigned uSeqIndex = 0; uSeqIndex < uLeafCount; ++uSeqIndex)
		{
		const char *Name = msaOut.GetSeqName(uSeqIndex);
		unsigned uId = vAll.GetSeqIdFromName(Name);
		msaOut.SetSeqId(uSeqIndex, uId);
		}

	unlink(InTmp);
	unlink(OutTmp);

	delete[] Leaves;
	}
コード例 #6
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
  unsigned uColTo, Seq &s)
	{
	s.Clear();
	s.SetName(msa.GetSeqName(uSeqIndex));
	s.SetId(msa.GetSeqId(uSeqIndex));
	for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
		{
		char c = msa.GetChar(uSeqIndex, uColIndex);
		if (!IsGapChar(c))
			s.AppendChar(c);
		}
	}
コード例 #7
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void convertMSA2MAlignment(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& res) {
    assert(res->isEmpty());
    MuscleContext *ctx = getMuscleContext();
    res->setAlphabet(al);
    ctx->output_uIds.clear();

    for(int i=0, n = msa.GetSeqCount(); i < n; i++) {
        QString name = msa.GetSeqName(i);
        QByteArray seq;
        seq.reserve(msa.GetColCount());
        for (int j = 0, m = msa.GetColCount(); j < m ; j++) {
            char c = msa.GetChar(i, j);
            seq.append(c);
        }
        ctx->output_uIds.append(ctx->tmp_uIds[msa.GetSeqId(i)]);
        res->addRow(name, seq);
    }
}
コード例 #8
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #9
0
ファイル: stabilize.cpp プロジェクト: bigmuscle/bigmuscle
void Stabilize(const MSA &msa, MSA &msaStable)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	msaStable.SetSize(uSeqCount, uColCount);
	for (unsigned uId = 0; uId < uSeqCount; ++uId)
		{
		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
		msaStable.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			msaStable.SetChar(uId, uColIndex, c);
			}
		}
	}
コード例 #10
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
		{
		unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
		unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);
		msaOut.SetSeqId(uSeqIndexOut, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}
コード例 #11
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void SeqVectFromMSA(const MSA &msa, SeqVect &v)
	{
	v.Clear();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq s;
		msa.GetSeq(uSeqIndex, s);

		s.StripGaps();
		//if (0 == s.Length())
		//	continue;

		const char *ptrName = msa.GetSeqName(uSeqIndex);
		s.SetName(ptrName);

		v.AppendSeq(s);
		}
	}
コード例 #12
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
void SetThreeWayWeightsMuscle(MSA &msa)
	{
    MuscleContext *ctx =getMuscleContext();
    const Tree* &g_ptrMuscleTree = ctx->msa2.g_ptrMuscleTree;
    unsigned &g_uTreeSplitNode1 = ctx->msa2.g_uTreeSplitNode1;
    unsigned &g_uTreeSplitNode2 = ctx->msa2.g_uTreeSplitNode2;

	if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)
		{
		msa.SetHenikoffWeightsPB();
		return;
		}

	const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();
	WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];

	CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,
	  Weights);

	const unsigned uMSASeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)
		{
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		if (uId >= uMuscleSeqCount)
			Quit("SetThreeWayWeightsMuscle: id out of range");
		msa.SetSeqWeight(uSeqIndex, Weights[uId]);
		}
#if	LOCAL_VERBOSE
	{
	Log("SetThreeWayWeightsMuscle\n");
	for (unsigned n = 0; n < uMSASeqCount; ++n)
		{
		const unsigned uId = msa.GetSeqId(n);
		Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);
		}
	}
#endif
	msa.NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
コード例 #13
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// "Catenate" two MSAs (by bad analogy with UNIX cat command).
// msa1 and msa2 must have same sequence names, but possibly
// in a different order.
// msaCat is the combined alignment produce by appending
// sequences in msa2 to sequences in msa1.
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	const unsigned uColCountCat = uColCount1 + uColCount2;

	msaCat.SetSize(uSeqCount, uColCountCat);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
			{
			const char c = msa1.GetChar(uSeqIndex, uColIndex);
			msaCat.SetChar(uSeqIndex, uColIndex, c);
			}

		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
		unsigned uSeqIndex2;
		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #14
0
ファイル: refinew.cpp プロジェクト: bigmuscle/bigmuscle
void RefineW(const MSA &msaIn, MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uColCount = msaIn.GetColCount();

// Reserve same nr seqs, 20% more cols
	const unsigned uReserveColCount = (uColCount*120)/100;
	msaOut.SetSize(uSeqCount, uReserveColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
		}

	const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;
	if (0 == g_uWindowTo)
		g_uWindowTo = uWindowCount - 1;

#if	MEMDEBUG
	_CrtSetBreakAlloc(1560);
#endif

	if (g_uWindowOffset > 0)
		{
		MSA msaTmp;
		MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);
		}

	fprintf(stderr, "\n");
	for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)
		{
		fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
		const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;
		unsigned uColTo = uColFrom + g_uRefineWindow - 1;
		if (uColTo >= uColCount)
			uColTo = uColCount - 1;
		assert(uColTo >= uColFrom);

		SeqVect v;
		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);

#if	MEMDEBUG
		_CrtMemState s1;
		_CrtMemCheckpoint(&s1);
#endif

		MSA msaTmp;
		MUSCLE(v, msaTmp);
		AppendMSA(msaOut, msaTmp);
		if (uWindowIndex == g_uSaveWindow)
			{
			MSA msaInTmp;
			unsigned uOutCols = msaOut.GetColCount();
			unsigned un = uColTo - uColFrom + 1;
			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);

			char fn[256];
			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
			TextFile fIn(fn, true);
			msaInTmp.ToFile(fIn);

			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
			TextFile fv(fn, true);
			v.ToFile(fv);

			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
			TextFile fOut(fn, true);
			msaTmp.ToFile(fOut);
			}

#if	MEMDEBUG
		void FreeDPMemSPN();
		FreeDPMemSPN();

		_CrtMemState s2;
		_CrtMemCheckpoint(&s2);

		_CrtMemState s;
		_CrtMemDifference(&s, &s1, &s2);

		_CrtMemDumpStatistics(&s);
		_CrtMemDumpAllObjectsSince(&s1);
		exit(1);
#endif
//#if	DEBUG
//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
//#endif
		}
	fprintf(stderr, "\n");

//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
	}
コード例 #15
0
ファイル: scoregaps.cpp プロジェクト: ggrekhov/ugene
SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount)
	{
    MuscleContext *ctx = getMuscleContext();
    unsigned &g_ColCount = ctx->scoregaps.g_ColCount;
    unsigned &g_MaxSeqCount = ctx->scoregaps.g_MaxSeqCount;
    unsigned &g_MaxColCount = ctx->scoregaps.g_MaxColCount;
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
    
#if	TRACE
	{
	Log("ScoreGaps\n");
	Log("DiffCols ");
	for (unsigned i = 0; i < DiffColCount; ++i)
		Log(" %u", DiffCols[i]);
	Log("\n");
	Log("msa=\n");
	msa.LogMe();
	Log("\n");
	}
#endif
	const unsigned SeqCount = msa.GetSeqCount();
	const unsigned ColCount = msa.GetColCount();
	g_ColCount = ColCount;

	if (SeqCount > g_MaxSeqCount)
		{
		delete[] g_Gaps;
		g_MaxSeqCount = SeqCount + 256;
		g_Gaps = new GAPINFO *[g_MaxSeqCount];
		}
	memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *));

	if (ColCount > g_MaxColCount)
		{
		delete[] g_ColDiff;
		g_MaxColCount = ColCount + 256;
		g_ColDiff = new bool[g_MaxColCount];
		}

	memset(g_ColDiff, 0, g_ColCount*sizeof(bool));
	for (unsigned i = 0; i < DiffColCount; ++i)
		{
		unsigned Col = DiffCols[i];
		assert(Col < ColCount);
		g_ColDiff[Col] = true;
		}

	for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
		FindIntersectingGaps(msa, SeqIndex);

#if	TRACE
	{
	Log("\n");
	Log("Intersecting gaps:\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%c", g_ColDiff[Col] ? '*' : ' ');
	Log("\n");
	Log("      ");
	for (unsigned Col = 0; Col < ColCount; ++Col)
		Log("%d", Col%10);
	Log("\n");
	for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
		{
		Log("%3d:  ", Seq);
		for (unsigned Col = 0; Col < ColCount; ++Col)
			Log("%c", msa.GetChar(Seq, Col));
		Log("  :: ");
		for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next)
			Log(" (%d,%d)", GI->Start, GI->End);
		Log("  >%s\n", msa.GetSeqName(Seq));
		}
	Log("\n");
	}
#endif

	SCORE Score = 0;
	for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(Seq1);
		for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(Seq2);
//			const SCORE Pair = ScorePair(Seq1, Seq2);
			const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2);
			Score += w1*w2*Pair;
#if	TRACE
			Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n",
			  Seq1, Seq2, Pair, w1, w2, Score);
#endif
			}
		}

	return Score;
	}
コード例 #16
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #17
0
ファイル: refinevert.cpp プロジェクト: bigmuscle/bigmuscle
// Return true if any changes made
bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
	{
	bool bAnyChanges = false;

	const unsigned uColCountIn = msaIn.GetColCount();
	const unsigned uSeqCountIn = msaIn.GetSeqCount();

	if (uColCountIn < 3 || uSeqCountIn < 3)
		return false;

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;
	SetMSAWeightsMuscle(msaIn);
	FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	MSA msaOut;
	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
		MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange.LogMe();
#endif

		bool bLockLeft = (0 != uRangeIndex);
		bool bLockRight = (uRangeCount - 1 != uRangeIndex);
		bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
		bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);

#if	TRACE
		Log("After:\n");
		msaRange.LogMe();
#endif

		MSAAppend(msaOut, msaRange);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

#if	DEBUG
// Sanity check
	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
#endif

	delete[] Ranges;
	if (bAnyChanges)
		msaIn.Copy(msaOut);
	return bAnyChanges;
	}
コード例 #18
0
ファイル: aligngivenpathsw.cpp プロジェクト: Wyss/mauve-py
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("AlignTwoMSAsGivenPathSW\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

		AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

	AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
	}
コード例 #19
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
// Return true if any changes made
void AnchoredProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
	{

	const unsigned uColCountIn = msa1.GetColCount();
	const unsigned uSeqCountIn = msa1.GetSeqCount() + msa2.GetSeqCount();

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;

	PrepareMSAforScoring(msa1);
	PrepareMSAforScoring(msa2);
	FindAnchorColsPP(msa1, msa2, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName;
		unsigned uId;
		if( uSeqIndex < msa1.GetSeqCount() )
			{
			msa1.SetSeqId(uSeqIndex, uSeqIndex);
			ptrName = msa1.GetSeqName(uSeqIndex);
			}
		else
			{
			msa2.SetSeqId(uSeqIndex-msa1.GetSeqCount(), uSeqIndex);
			ptrName = msa2.GetSeqName(uSeqIndex-msa1.GetSeqCount());
			}
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uSeqIndex);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange1;
		MSA msaRange2;
		MSA msaRangeOut;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
/*		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
*/
		MSAFromColRange(msa1, uFromColIndex, uRangeColCount, msaRange1);
		MSAFromColRange(msa2, uFromColIndex, uRangeColCount, msaRange2);
		StripGapColumns(msaRange1);
		StripGapColumns(msaRange2);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange1.LogMe();
		msaRange2.LogMe();
#endif

		ProfileProfile(msaRange1, msaRange2, msaRangeOut);

#if	TRACE
		Log("After:\n");
		msaRangeOut.LogMe();
#endif
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
			msaRangeOut.SetSeqId(uSeqIndex, uSeqIndex);

		MSAAppend(msaOut, msaRangeOut);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

	delete[] Ranges;
	}
コード例 #20
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
void RefineW(const MSA &msaIn, MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uColCount = msaIn.GetColCount();

// Reserve same nr seqs, 20% more cols
	const unsigned uReserveColCount = (uColCount*120)/100;
	msaOut.SetSize(uSeqCount, uReserveColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
		}

	const unsigned uWindowCount = (uColCount + g_uRefineWindow.get() - 1)/g_uRefineWindow.get();
	if (0 == g_uWindowTo.get())
		g_uWindowTo.get() = uWindowCount - 1;

#if	MEMDEBUG
	_CrtSetBreakAlloc(1560);
#endif

	if (g_uWindowOffset.get() > 0)
		{
		MSA msaTmp;
		MSAFromColRange(msaIn, 0, g_uWindowOffset.get(), msaOut);
		}

	if (!g_bQuiet.get())
		fprintf(stderr, "\n");
	for (unsigned uWindowIndex = g_uWindowFrom.get(); uWindowIndex <= g_uWindowTo.get(); ++uWindowIndex)
		{
		if (!g_bQuiet.get())
			fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
		const unsigned uColFrom = g_uWindowOffset.get() + uWindowIndex*g_uRefineWindow.get();
		unsigned uColTo = uColFrom + g_uRefineWindow.get() - 1;
		if (uColTo >= uColCount)
			uColTo = uColCount - 1;
		assert(uColTo >= uColFrom);

		SeqVect v;
		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);

#if	MEMDEBUG
		_CrtMemState s1;
		_CrtMemCheckpoint(&s1);
#endif
		// Begin AED 5/20/06
		// remove any empty seqs in this window
		std::vector< size_t > empty_seqs;
		SeqVect vr;
		for( size_t seqI = 0; seqI < v.size(); ++seqI )
		{
			if( v[seqI]->size() == 0 )
				empty_seqs.push_back(seqI);
			else
				vr.push_back(v[seqI]);
		}
		std::vector< unsigned > seqid_map( vr.size() );
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
		{
			seqid_map[seqI] = vr[seqI]->GetId();
			vr[seqI]->SetId(seqI);
		}

		MSA msaTmp;
		if( vr.size() > 1 )
			MUSCLE(vr, msaTmp);

		// remap the seqids to their original state
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
			vr[seqI]->SetId(seqid_map[seqI]);

		// merge empty seqs back in
		{
			const unsigned uSeqCount = msaOut.GetSeqCount();

			const unsigned uColCount1 = msaOut.GetColCount();
			const unsigned uColCount2 = vr.size() > 1 ? msaTmp.GetColCount() : vr[0]->size();
			const unsigned uColCountCat = uColCount1 + uColCount2;
			for( unsigned seqI = 0; seqI < vr.size(); ++seqI )
			{
				unsigned uSeqIndex = msaOut.GetSeqIndex(seqid_map[seqI]);
				if( vr.size() > 1 )
				{
					unsigned uSeqIndex2 = msaTmp.GetSeqIndex(seqI);
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = msaTmp.GetChar(uSeqIndex2, uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}else{
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = vr[0]->GetChar(uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}
			}
			for( unsigned seqI = 0; seqI < empty_seqs.size(); ++seqI )
			{
				unsigned uSeqId2 = v[empty_seqs[seqI]]->GetId();
				unsigned uSeqIndex = msaOut.GetSeqIndex(uSeqId2);
				for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
					msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
				}
			}
			vr.clear();
		}
//		AppendMSA(msaOut, msaTmp);
		// end AED 5/20/06

		if (uWindowIndex == g_uSaveWindow.get())
			{
			MSA msaInTmp;
			unsigned uOutCols = msaOut.GetColCount();
			unsigned un = uColTo - uColFrom + 1;
			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);

			char fn[256];
			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
			TextFile fIn(fn, true);
			msaInTmp.ToFile(fIn);

			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
			TextFile fv(fn, true);
			v.ToFile(fv);

			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
			TextFile fOut(fn, true);
			msaTmp.ToFile(fOut);
			}

#if	MEMDEBUG
		void FreeDPMemSPN();
		FreeDPMemSPN();

		_CrtMemState s2;
		_CrtMemCheckpoint(&s2);

		_CrtMemState s;
		_CrtMemDifference(&s, &s1, &s2);

		_CrtMemDumpStatistics(&s);
		_CrtMemDumpAllObjectsSince(&s1);
		exit(1);
#endif
//#if	DEBUG
//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
//#endif
		}
	if (!g_bQuiet.get())
		fprintf(stderr, "\n");

//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
	}
コード例 #21
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	g_ptrMSA1.get() = &msa1;
	g_ptrMSA2.get() = &msa2;
	g_uSeqIndex1.get() = uSeqIndex1;
	g_uSeqIndex2.get() = uSeqIndex2;

	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPair, different lengths");

#if	TRACE
	Log("ScoreSeqPair\n");
	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex1, i));
	Log("\n");
	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex2, i));
	Log("\n");
#endif

	SCORE scoreTotal = 0;

// Substitution scores
	unsigned uFirstLetter1 = uInsane;
	unsigned uFirstLetter2 = uInsane;
	unsigned uLastLetter1 = uInsane;
	unsigned uLastLetter2 = uInsane;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);

		if (!bGap1)
			{
			if (uInsane == uFirstLetter1)
				uFirstLetter1 = uColIndex;
			uLastLetter1 = uColIndex;
			}
		if (!bGap2)
			{
			if (uInsane == uFirstLetter2)
				uFirstLetter2 = uColIndex;
			uLastLetter2 = uColIndex;
			}

		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
			continue;

		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);

		SCORE scoreMatch = (*g_ptrScoreMatrix.get())[uLetter1][uLetter2];
		scoreTotal += scoreMatch;
#if	TRACE
		Log("%c <-> %c = %7.1f  %10.1f\n",
		  msa1.GetChar(uSeqIndex1, uColIndex),
		  msa2.GetChar(uSeqIndex2, uColIndex),
		  scoreMatch,
		  scoreTotal);
#endif
		}
	
	*ptrLetters = scoreTotal;

// Gap penalties
	unsigned uGapLength = uInsane;
	unsigned uGapStartCol = uInsane;
	bool bGapping1 = false;
	bool bGapping2 = false;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGapping1)
			{
			if (bGap1)
				++uGapLength;
			else
				{
				bGapping1 = false;
				bool bNTerm = (uFirstLetter2 == uGapStartCol);
				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			continue;
			}
		else
			{
			if (bGap1)
				{
				uGapStartCol = uColIndex;
				bGapping1 = true;
				uGapLength = 1;
				continue;
				}
			}

		if (bGapping2)
			{
			if (bGap2)
				++uGapLength;
			else
				{
				bGapping2 = false;
				bool bNTerm = (uFirstLetter1 == uGapStartCol);
				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			}
		else
			{
			if (bGap2)
				{
				uGapStartCol = uColIndex;
				bGapping2 = true;
				uGapLength = 1;
				}
			}
		}

	if (bGapping1 || bGapping2)
		{
		SCORE scoreGap = GapPenalty(uGapLength, true);
		scoreTotal += scoreGap;
#if	TRACE
		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
		Log("GAP         %7.1f  %10.1f\n",
		  scoreGap,
		  scoreTotal);
#endif
		}
	*ptrGaps = scoreTotal - *ptrLetters;
	return scoreTotal;
	}
コード例 #22
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
// this is a version of the profile x profile score that computes
// a per-site score suitable for use with anchoring heuristics
SCORE LetterObjScoreXP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE* mmScore = NULL;
	SCORE* ggScore = NULL;
	if( MatchScore != NULL )
		{
		mmScore = new SCORE[uColCount1];
		ggScore = new SCORE[uColCount1];
		memset( MatchScore, 0, sizeof(SCORE)*uColCount1 );
		}

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			if( mmScore != NULL )
				memset( mmScore, 0, sizeof(SCORE)*uColCount1 );
			if( ggScore != NULL )
				memset( ggScore, 0, sizeof(SCORE)*uColCount1 );
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2, mmScore);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2, ggScore);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w*scorePair;
			++uPairCount;
			if( MatchScore != NULL )
				for( unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex )
					MatchScore[uColIndex] += w*(mmScore[uColIndex]+ggScore[uColIndex]);
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	if(	mmScore != NULL )
		delete[] mmScore;
	if( ggScore != NULL )
		delete[] ggScore;

	return scoreTotal;
	}