コード例 #1
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void AssertMSAEq(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();
	if (uSeqCount1 != uSeqCount2)
		Quit("Seq count differs");

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)
		{
		Seq seq1;
		msa1.GetSeq(uSeqIndex, seq1);

		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);

		Seq seq2;
		msa2.GetSeq(uSeqIndex2, seq2);

		if (!seq1.Eq(seq2))
			{
			Log("Input:\n");
			seq1.LogMe();
			Log("Output:\n");
			seq2.LogMe();
			Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));
			}
		}
	}
コード例 #2
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The XP score is the sum of the score of each pair of
// sequences between two profiles which are aligned to
// each other. Notice that for two given profiles aligned
// in different ways, the difference in XP score must be
// the same as the difference in SP score because the
// score of a pair of sequences in one profile doesn't
// depend on the alignment.
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w1*w2*scorePair;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #3
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
// Append msa2 at the end of msa1
void AppendMSA(MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	const unsigned uColCountCat = uColCount1 + uColCount2;

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2;
		bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #4
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
  MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uInColCount = msaIn.GetColCount();

	if (uFromColIndex + uColCount - 1 > uInColCount)
		Quit("MSAFromColRange, out of bounds");

	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #5
0
ファイル: maketree.cpp プロジェクト: Wyss/mauve-py
void DoMakeTree()
	{
	if (g_pstrInFileName.get() == 0 || g_pstrOutFileName.get() == 0)
		Quit("-maketree requires -in <msa> and -out <treefile>");

	SetStartTime();

	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile MSAFile(g_pstrInFileName.get());

	MSA msa;
	msa.FromFile(MSAFile);

	unsigned uSeqCount = msa.GetSeqCount();
	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);
	SetMuscleInputMSA(msa);

	Progress("%u sequences", uSeqCount);

	Tree tree;
	TreeFromMSA(msa, tree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());

	TextFile TreeFile(g_pstrOutFileName.get(), true);
	tree.ToFile(TreeFile);

	Progress("Tree created");
	}
コード例 #6
0
ファイル: mhack.cpp プロジェクト: Unode/ext_apps
void MHackEnd(MSA &msa)
	{
	if (ALPHA_Amino != g_Alpha)
		return;
	if (0 == M)
		return;

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa.GetSeqId(uSeqIndex);
		if (M[uId])
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
				{
				if (!msa.IsGap(uSeqIndex, uColIndex))
					{
					msa.SetChar(uSeqIndex, uColIndex, 'M');
					break;
					}
				}
			}
		}

	delete[] M;
	M = 0;
	}
コード例 #7
0
ファイル: dosp.cpp プロジェクト: ggrekhov/ugene
void DoSP()
	{
    MuscleContext *ctx = getMuscleContext();

	TextFile f(ctx->params.g_pstrSPFileName);

	MSA a;
	a.FromFile(f);

	ALPHA Alpha = ALPHA_Undefined;
	switch (ctx->params.g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = a.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);
	a.FixAlpha();

	SetPPScore();

	const unsigned uSeqCount = a.GetSeqCount();
	if (0 == uSeqCount)
		Quit("No sequences in input file %s", ctx->params.g_pstrSPFileName);

	MSA::SetIdCount(uSeqCount);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		a.SetSeqId(uSeqIndex, uSeqIndex);

	SetSeqWeightMethod(ctx->params.g_SeqWeight1);
	Tree tree;
	TreeFromMSA(a, tree, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2);
	SetMuscleTree(tree);
	SetMSAWeightsMuscle((MSA &) a);

	SCORE SP = ObjScoreSP(a);

	Log("File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP);
	fprintf(stderr, "File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP);
	}
コード例 #8
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
void PrepareMSAforScoring( MSA& msa )
{
	Tree tree;
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);

	TreeFromMSA(msa, tree, g_Cluster2.get(), g_Distance2.get(), g_Root1.get());
	SetMuscleTree(tree);
	SetMSAWeightsMuscle(msa);
}
コード例 #9
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
	{
	if (g_PPScore != PPSCORE_LE)
		Quit("FastScoreMSA_LASimple: LA");

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	const ProfPos *Prof = ProfileFromMSA(msa);

	if (0 != MatchScore)
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;

	SCORE scoreTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
		SCORE scoreSeq = 0;
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const ProfPos &PP = Prof[uColIndex];
			if (msa.IsGap(uSeqIndex, uColIndex))
				{
				bool bOpen = (0 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex - 1));
				bool bClose = (uColCount - 1 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex + 1));

				if (bOpen)
					scoreSeq += PP.m_scoreGapOpen;
				if (bClose)
					scoreSeq += PP.m_scoreGapClose;
				//if (!bOpen && !bClose)
				//	scoreSeq += PP.m_scoreGapExtend;
				}
			else if (msa.IsWildcard(uSeqIndex, uColIndex))
				continue;
			else
				{
				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
				const SCORE scoreMatch = PP.m_AAScores[uLetter];
				if (0 != MatchScore)
					MatchScore[uColIndex] += weightSeq*scoreMatch;
				scoreSeq += scoreMatch;
				}
			}
		scoreTotal += weightSeq*scoreSeq;
		}

	delete[] Prof;
	return scoreTotal;
	}
コード例 #10
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,
  SeqVect &v)
	{
	v.Clear();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq s;
		SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);
		v.AppendSeq(s);
		}
	}
コード例 #11
0
ファイル: refinetreee.cpp プロジェクト: ggrekhov/ugene
void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes)
	{
    MuscleContext *ctx = getMuscleContext();
	const unsigned uSeqCount = msa.GetSeqCount();
	if (tree.GetLeafCount() != uSeqCount)
		Quit("Refine tree, tree has different number of nodes");

	if (uSeqCount < 3)
		return;

#if	DEBUG
	ValidateMuscleIds(msa);
	ValidateMuscleIds(tree);
#endif

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *uNewNodeIndexToOldNodeIndex= new unsigned[uNodeCount];

	Tree Tree2;
	TreeFromMSA(msa, Tree2, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2, ctx->params.g_pstrDistMxFileName2);

#if	DEBUG
	ValidateMuscleIds(Tree2);
#endif

	DiffTreesE(Tree2, tree, uNewNodeIndexToOldNodeIndex);

	unsigned uRoot = Tree2.GetRootNodeIndex();
	if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uRoot])
		{
		MSA msa2;
		RealignDiffsE(msa, v, Tree2, tree, uNewNodeIndexToOldNodeIndex, msa2, ProgNodes);
        if (!ctx->isCanceled()) {
            tree.Copy(Tree2);
		    msa.Copy(msa2);
#if	DEBUG
            ValidateMuscleIds(msa2);
#endif
        }
		}

	delete[] uNewNodeIndexToOldNodeIndex;

    if (ctx->isCanceled()) {
        throw MuscleException("Canceled");
    }

	SetCurrentAlignment(msa);
	ProgressStepsDone();

	}
コード例 #12
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void SetClustalWWeightsMuscle(MSA &msa)
	{
	if (0 == g_MuscleWeights)
		Quit("g_MuscleWeights = 0");
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		if (uId >= g_uMuscleIdCount)
			Quit("SetClustalWWeightsMuscle: id out of range");
		msa.SetSeqWeight(uSeqIndex, g_MuscleWeights[uId]);
		}
	msa.NormalizeWeights((WEIGHT) 1.0);
	}
コード例 #13
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
void StripGapColumns( MSA& msa )
	{
	unsigned uCurCol = 0;
	for( unsigned uColIndex = 0; uColIndex < msa.GetColCount(); uColIndex++ )
		{
		if( !msa.IsGapColumn(uColIndex) )
			{
			for( unsigned uGapSeq = 0; uGapSeq < msa.GetSeqCount(); uGapSeq++ )
				{
				msa.SetChar(uGapSeq, uCurCol, msa.GetChar(uGapSeq,uColIndex));
				}
			uCurCol++;
			}
		}
	msa.DeleteColumns(uCurCol, msa.GetColCount()-uCurCol);
	}
コード例 #14
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Append msa2 at the end of msa1
void MSAAppend(MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
			{
			const char c = msa2.GetChar(uSeqIndex2, uColIndex);
			msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
			}
		}
	}
コード例 #15
0
ファイル: stabilize.cpp プロジェクト: bigmuscle/bigmuscle
void Stabilize(const MSA &msa, MSA &msaStable)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	msaStable.SetSize(uSeqCount, uColCount);
	for (unsigned uId = 0; uId < uSeqCount; ++uId)
		{
		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
		msaStable.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			msaStable.SetChar(uId, uColIndex, c);
			}
		}
	}
コード例 #16
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void convertMSA2MAlignment(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& res) {
    assert(res->isEmpty());
    MuscleContext *ctx = getMuscleContext();
    res->setAlphabet(al);
    ctx->output_uIds.clear();

    for(int i=0, n = msa.GetSeqCount(); i < n; i++) {
        QString name = msa.GetSeqName(i);
        QByteArray seq;
        seq.reserve(msa.GetColCount());
        for (int j = 0, m = msa.GetColCount(); j < m ; j++) {
            char c = msa.GetChar(i, j);
            seq.append(c);
        }
        ctx->output_uIds.append(ctx->tmp_uIds[msa.GetSeqId(i)]);
        res->addRow(name, seq);
    }
}
コード例 #17
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	msa.LogMe();
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif
	SCORE TotalLetters = 0;
	SCORE TotalGaps = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE Letters;
			SCORE Gaps;
			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
			  &Letters, &Gaps);
			scoreTotal += w1*w2*scorePair;
			TotalLetters += w1*w2*Letters;
			TotalGaps += w1*w2*Gaps;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  uSeqIndex1,
			  msa.GetSeqName(uSeqIndex1),
			  uSeqIndex2,
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
	*ptrLetters = TotalLetters;
	*ptrGaps = TotalGaps;
	return scoreTotal;
	}
コード例 #18
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
static double GetColScore(const MSA &msa, unsigned uCol)
	{
    MuscleContext *d = getMuscleContext();
    unsigned &g_AlphaSize = d->alpha.g_AlphaSize;
    ALPHA &g_Alpha = d->alpha.g_Alpha;

	const unsigned uSeqCount = msa.GetSeqCount();
	unsigned uPairCount = 0;
	double dSum = 0.0;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		if (msa.IsGap(uSeq1, uCol))
			continue;
		unsigned uLetter1 = msa.GetLetterEx(uSeq1, uCol);
		if (uLetter1 >= g_AlphaSize)
			continue;
		for (unsigned uSeq2 = uSeq1 + 1; uSeq2 < uSeqCount; ++uSeq2)
			{
			if (msa.IsGap(uSeq2, uCol))
				continue;
			unsigned uLetter2 = msa.GetLetterEx(uSeq2, uCol);
			if (uLetter2 >= g_AlphaSize)
				continue;
			double Score;
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				Score = VTML_SP[uLetter1][uLetter2];
				break;
			case ALPHA_DNA:
			case ALPHA_RNA:
				Score = NUC_SP[uLetter1][uLetter2];
				break;
			default:
				Quit("GetColScore: invalid alpha=%d", g_Alpha);
				}
			dSum += Score;
			++uPairCount;
			}
		}
	if (0 == uPairCount)
		return 0;
	return dSum / uPairCount;
	}
コード例 #19
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void SeqVectFromMSA(const MSA &msa, SeqVect &v)
	{
	v.Clear();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq s;
		msa.GetSeq(uSeqIndex, s);

		s.StripGaps();
		//if (0 == s.Length())
		//	continue;

		const char *ptrName = msa.GetSeqName(uSeqIndex);
		s.SetName(ptrName);

		v.AppendSeq(s);
		}
	}
コード例 #20
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
void SetThreeWayWeightsMuscle(MSA &msa)
	{
    MuscleContext *ctx =getMuscleContext();
    const Tree* &g_ptrMuscleTree = ctx->msa2.g_ptrMuscleTree;
    unsigned &g_uTreeSplitNode1 = ctx->msa2.g_uTreeSplitNode1;
    unsigned &g_uTreeSplitNode2 = ctx->msa2.g_uTreeSplitNode2;

	if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)
		{
		msa.SetHenikoffWeightsPB();
		return;
		}

	const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();
	WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];

	CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,
	  Weights);

	const unsigned uMSASeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)
		{
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		if (uId >= uMuscleSeqCount)
			Quit("SetThreeWayWeightsMuscle: id out of range");
		msa.SetSeqWeight(uSeqIndex, Weights[uId]);
		}
#if	LOCAL_VERBOSE
	{
	Log("SetThreeWayWeightsMuscle\n");
	for (unsigned n = 0; n < uMSASeqCount; ++n)
		{
		const unsigned uId = msa.GetSeqId(n);
		Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);
		}
	}
#endif
	msa.NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
コード例 #21
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
void WriteScoreFile(const MSA &msa)
	{
    MuscleContext *ctx = getMuscleContext();

	FILE *f = fopen(ctx->params.g_pstrScoreFileName, "w");
	if (0 == f)
		Quit("Cannot open score file '%s' errno=%d", ctx->params.g_pstrScoreFileName, errno);

	const unsigned uColCount = msa.GetColCount();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uCol = 0; uCol < uColCount; ++uCol)
		{
		double Score = GetColScore(msa, uCol);
		fprintf(f, "%10.3f  ", Score);
		for (unsigned uSeq = 0; uSeq < uSeqCount; ++uSeq)
			{
			char c = msa.GetChar(uSeq, uCol);
			fprintf(f, "%c", c);
			}
		fprintf(f, "\n");
		}
	fclose(f);
	}
コード例 #22
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// "Catenate" two MSAs (by bad analogy with UNIX cat command).
// msa1 and msa2 must have same sequence names, but possibly
// in a different order.
// msaCat is the combined alignment produce by appending
// sequences in msa2 to sequences in msa1.
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	const unsigned uColCountCat = uColCount1 + uColCount2;

	msaCat.SetSize(uSeqCount, uColCountCat);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
			{
			const char c = msa1.GetChar(uSeqIndex, uColIndex);
			msaCat.SetChar(uSeqIndex, uColIndex, c);
			}

		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
		unsigned uSeqIndex2;
		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #23
0
ファイル: refinehorizP.cpp プロジェクト: ggrekhov/ugene
    void RefineWorker::_run() {

        unsigned i = 0;
#if TRACE
        algoLog.trace(QString("Worker %1 start. Wait...").arg(QString::number(workerID)));
#endif
        workpool->mainSem.acquire();
#if TRACE
        algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)).
            arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif
        while(!workpool->isRefineDone())
        {
            MSA msaIn;
            
            i = workpool->refineGetJob(&msaIn, workerID);

            MuscleContext *ctx = workpool->ctx;
//            unsigned &g_uTreeSplitNode1 = ctx->muscle.g_uTreeSplitNode1;
//            unsigned &g_uTreeSplitNode2 = ctx->muscle.g_uTreeSplitNode2;
//            unsigned &g_uRefineHeightSubtree = ctx->refinehoriz.g_uRefineHeightSubtree;
//            unsigned &g_uRefineHeightSubtreeTotal = ctx->refinehoriz.g_uRefineHeightSubtreeTotal;
            Tree &tree = workpool->GuideTree;
            const unsigned uSeqCount = msaIn.GetSeqCount();
//            const unsigned uInternalNodeCount = uSeqCount - 1;
            unsigned *Leaves1 = new unsigned[uSeqCount];
            unsigned *Leaves2 = new unsigned[uSeqCount];
            const unsigned uRootNodeIndex = tree.GetRootNodeIndex();

            while (i != NULL_NEIGHBOR) {

                const unsigned uInternalNodeIndex = workpool->InternalNodeIndexes[i];
                unsigned uNeighborNodeIndex;
                if (tree.IsRoot(uInternalNodeIndex) && !workpool->bRight) {
                    i = workpool->refineGetNextJob(&msaIn, false, -1, i, workerID);
                    continue;
                }
                else if (workpool->bRight)
                    uNeighborNodeIndex = tree.GetRight(uInternalNodeIndex);
                else
                    uNeighborNodeIndex = tree.GetLeft(uInternalNodeIndex);

                //            g_uTreeSplitNode1 = uInternalNodeIndex;
                //            g_uTreeSplitNode2 = uNeighborNodeIndex;

                unsigned uCount1;
                unsigned uCount2;

                GetLeaves(tree, uNeighborNodeIndex, Leaves1, &uCount1);
                GetLeavesExcluding(tree, uRootNodeIndex, uNeighborNodeIndex,
                    Leaves2, &uCount2);

                SCORE scoreBefore;
                SCORE scoreAfter;
                bool bAccepted = TryRealign(msaIn, tree, Leaves1, uCount1, Leaves2, uCount2,
                    &scoreBefore, &scoreAfter, workpool->bLockLeft, workpool->bLockRight);

                SCORE scoreMax = scoreAfter > scoreBefore? scoreAfter : scoreBefore;
                //bool bRepeated = workpool->History->SetScore(workpool->uIter, uInternalNodeIndex, workpool->bRight, scoreMax);
                i = workpool->refineGetNextJob(&msaIn, bAccepted, scoreMax, i, workerID);
            }

            delete[] Leaves1;
            delete[] Leaves2;
#if TRACE
            algoLog.trace(QString("Worker %1: no job available. Wait... (mainSem %2, childSem %3)").arg(QString::number(workerID)).
                arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif         
            workpool->childSem.release();
            workpool->mainSem.acquire();
#if TRACE      
            algoLog.trace(QString("Worker %1: Stop wait. Start (mainSem %2, childSem %3)").arg(QString::number(workerID)).
                arg(QString::number(workpool->mainSem.available())).arg(QString::number(workpool->mainSem.available())));
#endif         
        }
#if TRACE
        algoLog.trace(QString("Worker %1: Refine done. Exit").arg(QString::number(workerID)));
#endif
    }
コード例 #24
0
ファイル: refine.cpp プロジェクト: Wyss/mauve-py
void Refine()
	{
	SetOutputFileName(g_pstrOutFileName.get());
	SetInputFileName(g_pstrInFileName.get());
	SetStartTime();

	SetMaxIters(g_uMaxIters.get());
	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile fileIn(g_pstrInFileName.get());
	MSA msa;
	msa.FromFile(fileIn);

	const unsigned uSeqCount = msa.GetSeqCount();
	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType.get())
		{
	case SEQTYPE_Auto:
		Alpha = msa.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);
	msa.FixAlpha();

	SetPPScore();
	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
		SetPPScore(PPSCORE_SPN);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);
	SetMuscleInputMSA(msa);

	Tree GuideTree;
	TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());
	SetMuscleTree(GuideTree);

	if (g_bAnchors.get())
		RefineVert(msa, GuideTree, g_uMaxIters.get());
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters.get(), false, false);

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

//	TextFile fileOut(g_pstrOutFileName.get(), true);
//	msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
コード例 #25
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
void RefineW(const MSA &msaIn, MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uColCount = msaIn.GetColCount();

// Reserve same nr seqs, 20% more cols
	const unsigned uReserveColCount = (uColCount*120)/100;
	msaOut.SetSize(uSeqCount, uReserveColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
		}

	const unsigned uWindowCount = (uColCount + g_uRefineWindow.get() - 1)/g_uRefineWindow.get();
	if (0 == g_uWindowTo.get())
		g_uWindowTo.get() = uWindowCount - 1;

#if	MEMDEBUG
	_CrtSetBreakAlloc(1560);
#endif

	if (g_uWindowOffset.get() > 0)
		{
		MSA msaTmp;
		MSAFromColRange(msaIn, 0, g_uWindowOffset.get(), msaOut);
		}

	if (!g_bQuiet.get())
		fprintf(stderr, "\n");
	for (unsigned uWindowIndex = g_uWindowFrom.get(); uWindowIndex <= g_uWindowTo.get(); ++uWindowIndex)
		{
		if (!g_bQuiet.get())
			fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
		const unsigned uColFrom = g_uWindowOffset.get() + uWindowIndex*g_uRefineWindow.get();
		unsigned uColTo = uColFrom + g_uRefineWindow.get() - 1;
		if (uColTo >= uColCount)
			uColTo = uColCount - 1;
		assert(uColTo >= uColFrom);

		SeqVect v;
		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);

#if	MEMDEBUG
		_CrtMemState s1;
		_CrtMemCheckpoint(&s1);
#endif
		// Begin AED 5/20/06
		// remove any empty seqs in this window
		std::vector< size_t > empty_seqs;
		SeqVect vr;
		for( size_t seqI = 0; seqI < v.size(); ++seqI )
		{
			if( v[seqI]->size() == 0 )
				empty_seqs.push_back(seqI);
			else
				vr.push_back(v[seqI]);
		}
		std::vector< unsigned > seqid_map( vr.size() );
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
		{
			seqid_map[seqI] = vr[seqI]->GetId();
			vr[seqI]->SetId(seqI);
		}

		MSA msaTmp;
		if( vr.size() > 1 )
			MUSCLE(vr, msaTmp);

		// remap the seqids to their original state
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
			vr[seqI]->SetId(seqid_map[seqI]);

		// merge empty seqs back in
		{
			const unsigned uSeqCount = msaOut.GetSeqCount();

			const unsigned uColCount1 = msaOut.GetColCount();
			const unsigned uColCount2 = vr.size() > 1 ? msaTmp.GetColCount() : vr[0]->size();
			const unsigned uColCountCat = uColCount1 + uColCount2;
			for( unsigned seqI = 0; seqI < vr.size(); ++seqI )
			{
				unsigned uSeqIndex = msaOut.GetSeqIndex(seqid_map[seqI]);
				if( vr.size() > 1 )
				{
					unsigned uSeqIndex2 = msaTmp.GetSeqIndex(seqI);
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = msaTmp.GetChar(uSeqIndex2, uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}else{
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = vr[0]->GetChar(uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}
			}
			for( unsigned seqI = 0; seqI < empty_seqs.size(); ++seqI )
			{
				unsigned uSeqId2 = v[empty_seqs[seqI]]->GetId();
				unsigned uSeqIndex = msaOut.GetSeqIndex(uSeqId2);
				for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
					msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
				}
			}
			vr.clear();
		}
//		AppendMSA(msaOut, msaTmp);
		// end AED 5/20/06

		if (uWindowIndex == g_uSaveWindow.get())
			{
			MSA msaInTmp;
			unsigned uOutCols = msaOut.GetColCount();
			unsigned un = uColTo - uColFrom + 1;
			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);

			char fn[256];
			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
			TextFile fIn(fn, true);
			msaInTmp.ToFile(fIn);

			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
			TextFile fv(fn, true);
			v.ToFile(fv);

			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
			TextFile fOut(fn, true);
			msaTmp.ToFile(fOut);
			}

#if	MEMDEBUG
		void FreeDPMemSPN();
		FreeDPMemSPN();

		_CrtMemState s2;
		_CrtMemCheckpoint(&s2);

		_CrtMemState s;
		_CrtMemDifference(&s, &s1, &s2);

		_CrtMemDumpStatistics(&s);
		_CrtMemDumpAllObjectsSince(&s1);
		exit(1);
#endif
//#if	DEBUG
//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
//#endif
		}
	if (!g_bQuiet.get())
		fprintf(stderr, "\n");

//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
	}
コード例 #26
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #27
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex)
	{
    MuscleContext *ctx = getMuscleContext();
	SCOREMATRIX &Mx = *ctx->params.g_ptrScoreMatrix;
    unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize;

	const unsigned uSeqCount = msa.GetSeqCount();

#if	BRUTE_LETTERS
	SCORE BruteScore = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
			if (uLetter2 >= g_AlphaSize)
				continue;
			WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			BruteScore += w1*w2*Mx[uLetter1][uLetter2];
			}
		}
#endif
	
	double N = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		N += w;
		}
	if (N <= 0)
		return 0;

	FCOUNT Freqs[20];
	memset(Freqs, 0, sizeof(Freqs));
	SCORE Score = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter >= g_AlphaSize)
			continue;
		WEIGHT w = msa.GetSeqWeight(uSeqIndex1);
		Freqs[uLetter] += w;
		Score -= w*w*Mx[uLetter][uLetter];
		}

	for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1)
		{
		const FCOUNT f1 = Freqs[uLetter1];
		Score += f1*f1*Mx[uLetter1][uLetter1];
		for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2)
			{
			const FCOUNT f2 = Freqs[uLetter2];
			Score += 2*f1*f2*Mx[uLetter1][uLetter2];
			}
		}
	Score /= 2;
#if	BRUTE_LETTERS
	assert(BTEq(BruteScore, Score));
#endif
	return Score;
	}
コード例 #28
0
ファイル: refinesubfams.cpp プロジェクト: ggrekhov/ugene
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
	{
    MuscleContext *ctx = getMuscleContext();
    CLUSTER &g_Cluster2 = ctx->params.g_Cluster2;
    DISTANCE &g_Distance2 =  ctx->params.g_Distance2;
    ROOT &g_Root2 = ctx->params.g_Root2;
    bool &g_bAnchors = ctx->params.g_bAnchors;

	const unsigned uSeqCount = msa.GetSeqCount();
	if (uSeqCount < 3)
		return false;

	const double dMaxHeight = 0.6;
	const unsigned uMaxSubfamCount = 16;
    //const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams;
	unsigned uSubfamCount;
	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
	assert(uSubfamCount <= uSeqCount);

	if (ctx->params.g_bVerbose)
		LogSubfams(tree, Subfams, uSubfamCount);

	MSA *SubfamMSAs = new MSA[uSubfamCount];
	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned *Ids = new unsigned[uSeqCount];

	bool bAnyChanges = false;
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfam = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
		assert(uLeafCount <= uSeqCount);

		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);

		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
		DeleteGappedCols(msaSubfam);

#if	TRACE
		Log("Subfam %u MSA=\n", uSubfamIndex);
		msaSubfam.LogMe();
#endif

		if (msaSubfam.GetSeqCount() <= 2)
			continue;

	// TODO /////////////////////////////////////////
	// Try using existing tree, may actually hurt to
	// re-estimate, may also be a waste of CPU & mem.
	/////////////////////////////////////////////////
		Tree SubfamTree;
		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);

		bool bAnyChangesThisSubfam;
		if (g_bAnchors)
			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
		else
			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if	TRACE
		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
		if (bAnyChangesThisSubfam)
			bAnyChanges = true;
		}

	if (bAnyChanges)
		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);

	delete[] Leaves;
	delete[] Subfams;
	delete[] SubfamMSAs;

	return bAnyChanges;
	}
コード例 #29
0
ファイル: refinehorizP.cpp プロジェクト: ggrekhov/ugene
    bool TryRealign(MSA &msaIn, const Tree &tree, const unsigned Leaves1[],
        unsigned uCount1, const unsigned Leaves2[], unsigned uCount2,
        SCORE *ptrscoreBefore, SCORE *ptrscoreAfter,
        bool bLockLeft, bool bLockRight)
    {
#if	TRACE
        Log("TryRealign, msaIn=\n");
#endif

        MuscleContext *ctx = getMuscleContext();

        const unsigned uSeqCount = msaIn.GetSeqCount();

        unsigned *Ids1 = new unsigned[uSeqCount];
        unsigned *Ids2 = new unsigned[uSeqCount];

        LeafIndexesToIds(tree, Leaves1, uCount1, Ids1);
        LeafIndexesToIds(tree, Leaves2, uCount2, Ids2);

        MSA msa1;
        MSA msa2;

        MSASubsetByIds(msaIn, Ids1, uCount1, msa1);
        MSASubsetByIds(msaIn, Ids2, uCount2, msa2);

#if	DEBUG
        ValidateMuscleIds(msa1);
        ValidateMuscleIds(msa2);
#endif

        // Computing the objective score may be expensive for
        // large numbers of sequences. As a speed optimization,
        // we check whether the alignment changes. If it does
        // not change, there is no need to compute the objective
        // score. We test for the alignment changing by comparing
        // the Viterbi paths before and after re-aligning.
        PWPath pathBefore;
        pathBefore.FromMSAPair(msa1, msa2);

        DeleteGappedCols(msa1);
        DeleteGappedCols(msa2);

        if (0 == msa1.GetColCount() || 0 == msa2.GetColCount()) {
            delete[] Ids1;
            delete[] Ids2;
            return false;
        }

        MSA msaRealigned;
        PWPath pathAfter;

        AlignTwoMSAs(msa1, msa2, msaRealigned, pathAfter, bLockLeft, bLockRight);

        bool bAnyChanges = !pathAfter.Equal(pathBefore);
        unsigned uDiffCount1;
        unsigned uDiffCount2;
        unsigned* Edges1 = ctx->refinehoriz.Edges1;
        unsigned* Edges2 = ctx->refinehoriz.Edges2;
        DiffPaths(pathBefore, pathAfter, Edges1, &uDiffCount1, Edges2, &uDiffCount2);

#if	TRACE
        Log("TryRealign, msa1=\n");
        Log("\nmsa2=\n");
        Log("\nRealigned (changes %s)=\n", bAnyChanges ? "TRUE" : "FALSE");
#endif

        if (!bAnyChanges)
        {
            *ptrscoreBefore = 0;
            *ptrscoreAfter = 0;
            delete[] Ids1;
            delete[] Ids2;
            return false;
        }

        SetMSAWeightsMuscle(msaIn);
        SetMSAWeightsMuscle(msaRealigned);

#if	DIFFOBJSCORE
        const SCORE scoreDiff = DiffObjScore(msaIn, pathBefore, Edges1, uDiffCount1,
            msaRealigned, pathAfter, Edges2, uDiffCount2);
        bool bAccept = (scoreDiff > 0);
        *ptrscoreBefore = 0;
        *ptrscoreAfter = scoreDiff;
        //const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
        //const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);
        //Log("Diff = %.3g %.3g\n", scoreDiff, scoreAfter - scoreBefore);
#else
        const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2);
        const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2);

        bool bAccept = (scoreAfter > scoreBefore);

#if	TRACE
        Log("Score %g -> %g Accept %s\n", scoreBefore, scoreAfter, bAccept ? "TRUE" : "FALSE");
#endif

        *ptrscoreBefore = scoreBefore;
        *ptrscoreAfter = scoreAfter;
#endif

        if (bAccept)
            msaIn.Copy(msaRealigned);
        delete[] Ids1;
        delete[] Ids2;
        return bAccept;
    }
コード例 #30
0
ファイル: objscore.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ObjScore(const MSA &msa, const unsigned SeqIndexes1[],
  unsigned uSeqCount1, const unsigned SeqIndexes2[], unsigned uSeqCount2)
	{
#if	TIMING
	TICKS t1 = GetClockTicks();
#endif
	const unsigned uSeqCount = msa.GetSeqCount();

	OBJSCORE OS = g_ObjScore;
	if (g_ObjScore == OBJSCORE_SPM)
		{
        if (uSeqCount <= 100)
			OS = OBJSCORE_XP;
		else
			OS = OBJSCORE_SPF;
		}

	MSA msa1;
	MSA msa2;

	switch (OS)
		{
	case OBJSCORE_DP:
	case OBJSCORE_XP:
		MSAFromSeqSubset(msa, SeqIndexes1, uSeqCount1, msa1);
		MSAFromSeqSubset(msa, SeqIndexes2, uSeqCount2, msa2);

		SetMSAWeightsMuscle(msa1);
		SetMSAWeightsMuscle(msa2);
		break;

	case OBJSCORE_SP:
	case OBJSCORE_SPF:
	case OBJSCORE_PS:
	// Yuck -- casting away const (design flaw)
		SetMSAWeightsMuscle((MSA &) msa);
		break;
		}

	SCORE Score = 0;
	switch (OS)
		{
	case OBJSCORE_SP:
		Score = ObjScoreSP(msa);
		break;

	case OBJSCORE_DP:
		Score = ObjScoreDP(msa1, msa2);
		break;

	case OBJSCORE_XP:
		Score = ObjScoreXP(msa1, msa2);
		break;

	case OBJSCORE_PS:
		Score = ObjScorePS(msa);
		break;

	case OBJSCORE_SPF:
		Score = ObjScoreSPDimer(msa);
		break;
	
	default:
		Quit("Invalid g_ObjScore=%d", g_ObjScore);
		}
#if	TIMING
	TICKS t2 = GetClockTicks();
	g_ticksObjScore += (t2 - t1);
#endif
	return Score;
	}