コード例 #1
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
// Append msa2 at the end of msa1
void AppendMSA(MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	const unsigned uColCountCat = uColCount1 + uColCount2;

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2;
		bool bFound = msa2.GetSeqIndex(uId, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #2
0
ファイル: aligntwomsas.cpp プロジェクト: Unode/ext_apps
SCORE AlignTwoMSAs(const MSA &msa1, const MSA &msa2, MSA &msaOut, PWPath &Path,
  bool bLockLeft, bool bLockRight)
	{
	const unsigned uLengthA = msa1.GetColCount();
	const unsigned uLengthB = msa2.GetColCount();

	ProfPos *PA = ProfileFromMSA(msa1);
	ProfPos *PB = ProfileFromMSA(msa2);

	if (bLockLeft)
		{
		PA[0].m_scoreGapOpen = MINUS_INFINITY;
		PB[0].m_scoreGapOpen = MINUS_INFINITY;
		}

	if (bLockRight)
		{
		PA[uLengthA-1].m_scoreGapClose = MINUS_INFINITY;
		PB[uLengthB-1].m_scoreGapClose = MINUS_INFINITY;
		}

	float r = (float) uLengthA/ (float) (uLengthB + 1); // +1 to prevent div 0
	if (r < 1)
		r = 1/r;

	SCORE Score = GlobalAlign(PA, uLengthA, PB, uLengthB, Path);

	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);

	delete[] PA;
	delete[] PB;

	return Score;
	}
コード例 #3
0
ファイル: profile.cpp プロジェクト: Wyss/mauve-py
void ProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
	{

	unsigned uLength1;
	unsigned uLength2;

	uLength1 = msa1.GetColCount();
	uLength2 = msa2.GetColCount();

	Tree tree1;
	Tree tree2;
	ProfPos *Prof1 = ProfileFromMSALocal(msa1, tree1);
	ProfPos *Prof2 = ProfileFromMSALocal(msa2, tree2);

	PWPath Path;
	ProfPos *ProfOut;
	unsigned uLengthOut;
	Progress("Aligning profiles");
	AlignTwoProfs(Prof1, uLength1, 1.0, Prof2, uLength2, 1.0, Path, &ProfOut, &uLengthOut);

	Progress("Building output");
	AlignTwoMSAsGivenPath(Path, msa1, msa2, msaOut);
	delete[] Prof1;
	delete[] Prof2;
	delete[] ProfOut;
	}
コード例 #4
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
void FindAnchorColsPP(const MSA &msa1, const MSA &msa2, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
	const unsigned uColCount = msa1.GetColCount();
	if( uColCount != msa2.GetColCount() )
		{
		*ptruAnchorColCount = 0;
		return;	// the profiles must have equal length to find anchor cols
		}

	SCORE *MatchScore = new SCORE[uColCount];
	SCORE *SmoothScore = new SCORE[uColCount];
	unsigned *BestCols = new unsigned[uColCount];

	LetterObjScoreXP(msa1, msa2, MatchScore);
	g_uSmoothWindowLength.get() = 21;	// this is better for DNA
	g_uAnchorSpacing.get() = 96;
	WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength.get(), SmoothScore,
	  g_dSmoothScoreCeil.get());


	unsigned uBestColCount;
//	FindBestColsGrade(SmoothScore,uColCount,.85,BestCols,&uBestColCount);
	FindBestColsComboPP(uColCount, MatchScore, SmoothScore, g_dMinBestColScore.get(), g_dMinSmoothScore.get(),
	  BestCols, &uBestColCount);
/*
	std::cerr << "found " << uBestColCount << " anchor cols:\n";
	for( size_t colI = 0; colI < uBestColCount; colI++ )
	{
		if( colI > 0 )
			std::cerr << ", ";
		std::cerr << BestCols[colI];
	}
	std::cerr << std::endl;
*/

#if	TRACE
	ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
#endif

	MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing.get(), AnchorCols,
	  ptruAnchorColCount);
/*
	std::cerr << "\n\nafter merging, have " << *ptruAnchorColCount << " anchor cols:\n";
	for( size_t colI = 0; colI < *ptruAnchorColCount; colI++ )
	{
		if( colI > 0 )
			std::cerr << ", ";
		std::cerr << AnchorCols[colI];
	}
	std::cerr << std::endl;
*/
	delete[] MatchScore;
	delete[] SmoothScore;
	delete[] BestCols;
	}
コード例 #5
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The XP score is the sum of the score of each pair of
// sequences between two profiles which are aligned to
// each other. Notice that for two given profiles aligned
// in different ways, the difference in XP score must be
// the same as the difference in SP score because the
// score of a pair of sequences in one profile doesn't
// depend on the alignment.
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
	{
	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount1 != uColCount2)
		Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);

	const unsigned uSeqCount1 = msa1.GetSeqCount();
	const unsigned uSeqCount2 = msa2.GetSeqCount();

#if	TRACE
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif

	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			scoreTotal += w1*w2*scorePair;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  msa1.GetSeqName(uSeqIndex1),
			  msa2.GetSeqName(uSeqIndex2));
#endif
			}
		}
	if (0 == uPairCount)
		Quit("0 == uPairCount");

#if	TRACE
	Log("msa1=\n");
	msa1.LogMe();
	Log("msa2=\n");
	msa2.LogMe();
	Log("XP=%g\n", scoreTotal);
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #6
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// Objective score defined as the dynamic programming score.
// Input is two alignments, which must be of the same length.
// Result is the same profile-profile score that is optimized
// by dynamic programming.
SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
	{
	const unsigned uColCount = msa1.GetColCount();
	if (msa2.GetColCount() != uColCount)
		Quit("ObjScoreDP, must be same length");

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	const ProfPos *PA = ProfileFromMSA(msa1);
	const ProfPos *PB = ProfileFromMSA(msa2);

	return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
	}
コード例 #7
0
ファイル: aligngivenpathsw.cpp プロジェクト: Wyss/mauve-py
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif
	const unsigned uLengthA = msaA.GetColCount();
	const unsigned uLengthB = msaB.GetColCount();

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}
コード例 #8
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Return true if the given column has no gaps and all
// its residues are in the same biochemical group.
bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
	{
	extern unsigned ResidueGroup[];

	const unsigned uSeqCount = msa.GetColCount();
	if (0 == uSeqCount)
		Quit("MSAColIsConservative: empty alignment");

	if (msa.IsGap(0, uColIndex))
		return false;

	unsigned uLetter = msa.GetLetterEx(0, uColIndex);
    // cppcheck-suppress uninitvar
	const unsigned uGroup = ResidueGroup[uLetter];

	for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (msa.IsGap(uSeqIndex, uColIndex))
			return false;
		uLetter = msa.GetLetter(uSeqIndex, uColIndex);
		if (ResidueGroup[uLetter] != uGroup)
			return false;
		}
	return true;
	}
コード例 #9
0
ファイル: anchors.cpp プロジェクト: ggrekhov/ugene
void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
    MuscleContext *ctx = getMuscleContext();

	const unsigned uColCount = msa.GetColCount();
	if (uColCount < 16)
		{
		*ptruAnchorColCount = 0;
		return;
		}

	SCORE *MatchScore = new SCORE[uColCount];
	SCORE *SmoothScore = new SCORE[uColCount];
	unsigned *BestCols = new unsigned[uColCount];

	GetLetterScores(msa, MatchScore);
	static_WindowSmooth(MatchScore, uColCount, ctx->params.g_uSmoothWindowLength, SmoothScore,
	  ctx->params.g_dSmoothScoreCeil);

	unsigned uBestColCount;
	FindBestColsCombo(msa, MatchScore, SmoothScore, ctx->params.g_dMinBestColScore, ctx->params.g_dMinSmoothScore,
	  BestCols, &uBestColCount);

#if	TRACE
	ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
#endif

	MergeBestCols(MatchScore, BestCols, uBestColCount, ctx->params.g_uAnchorSpacing, AnchorCols,
	  ptruAnchorColCount);

	delete[] MatchScore;
	delete[] SmoothScore;
	delete[] BestCols;
	}
コード例 #10
0
ファイル: mhack.cpp プロジェクト: Unode/ext_apps
void MHackEnd(MSA &msa)
	{
	if (ALPHA_Amino != g_Alpha)
		return;
	if (0 == M)
		return;

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa.GetSeqId(uSeqIndex);
		if (M[uId])
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
				{
				if (!msa.IsGap(uSeqIndex, uColIndex))
					{
					msa.SetChar(uSeqIndex, uColIndex, 'M');
					break;
					}
				}
			}
		}

	delete[] M;
	M = 0;
	}
コード例 #11
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
void StripGapColumns( MSA& msa )
	{
	unsigned uCurCol = 0;
	for( unsigned uColIndex = 0; uColIndex < msa.GetColCount(); uColIndex++ )
		{
		if( !msa.IsGapColumn(uColIndex) )
			{
			for( unsigned uGapSeq = 0; uGapSeq < msa.GetSeqCount(); uGapSeq++ )
				{
				msa.SetChar(uGapSeq, uCurCol, msa.GetChar(uGapSeq,uColIndex));
				}
			uCurCol++;
			}
		}
	msa.DeleteColumns(uCurCol, msa.GetColCount()-uCurCol);
	}
コード例 #12
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
  MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uInColCount = msaIn.GetColCount();

	if (uFromColIndex + uColCount - 1 > uInColCount)
		Quit("MSAFromColRange, out of bounds");

	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #13
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Append msa2 at the end of msa1
void MSAAppend(MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
			{
			const char c = msa2.GetChar(uSeqIndex2, uColIndex);
			msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
			}
		}
	}
コード例 #14
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void convertMSA2MAlignment(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& res) {
    assert(res->isEmpty());
    MuscleContext *ctx = getMuscleContext();
    res->setAlphabet(al);
    ctx->output_uIds.clear();

    for(int i=0, n = msa.GetSeqCount(); i < n; i++) {
        QString name = msa.GetSeqName(i);
        QByteArray seq;
        seq.reserve(msa.GetColCount());
        for (int j = 0, m = msa.GetColCount(); j < m ; j++) {
            char c = msa.GetChar(i, j);
            seq.append(c);
        }
        ctx->output_uIds.append(ctx->tmp_uIds[msa.GetSeqId(i)]);
        res->addRow(name, seq);
    }
}
コード例 #15
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
	{
	if (g_PPScore != PPSCORE_LE)
		Quit("FastScoreMSA_LASimple: LA");

	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	const ProfPos *Prof = ProfileFromMSA(msa);

	if (0 != MatchScore)
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;

	SCORE scoreTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
		SCORE scoreSeq = 0;
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const ProfPos &PP = Prof[uColIndex];
			if (msa.IsGap(uSeqIndex, uColIndex))
				{
				bool bOpen = (0 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex - 1));
				bool bClose = (uColCount - 1 == uColIndex ||
				  !msa.IsGap(uSeqIndex, uColIndex + 1));

				if (bOpen)
					scoreSeq += PP.m_scoreGapOpen;
				if (bClose)
					scoreSeq += PP.m_scoreGapClose;
				//if (!bOpen && !bClose)
				//	scoreSeq += PP.m_scoreGapExtend;
				}
			else if (msa.IsWildcard(uSeqIndex, uColIndex))
				continue;
			else
				{
				unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
				const SCORE scoreMatch = PP.m_AAScores[uLetter];
				if (0 != MatchScore)
					MatchScore[uColIndex] += weightSeq*scoreMatch;
				scoreSeq += scoreMatch;
				}
			}
		scoreTotal += weightSeq*scoreSeq;
		}

	delete[] Prof;
	return scoreTotal;
	}
コード例 #16
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void DeleteGappedCols(MSA &msa)
	{
	unsigned uColIndex = 0;
	for (;;)
		{
		if (uColIndex >= msa.GetColCount())
			break;
		if (msa.IsGapColumn(uColIndex))
			msa.DeleteCol(uColIndex);
		else
			++uColIndex;
		}
	}
コード例 #17
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// "Catenate" two MSAs (by bad analogy with UNIX cat command).
// msa1 and msa2 must have same sequence names, but possibly
// in a different order.
// msaCat is the combined alignment produce by appending
// sequences in msa2 to sequences in msa1.
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	const unsigned uColCountCat = uColCount1 + uColCount2;

	msaCat.SetSize(uSeqCount, uColCountCat);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
			{
			const char c = msa1.GetChar(uSeqIndex, uColIndex);
			msaCat.SetChar(uSeqIndex, uColIndex, c);
			}

		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
		unsigned uSeqIndex2;
		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #18
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
  unsigned uEdgeCount)
	{
	const unsigned uColCount = msa.GetColCount();

// Letters
	SCORE Score = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const unsigned uColIndex = Edges[uEdgeIndex];
		assert(uColIndex < uColCount);
		Score += ScoreColLetters(msa, uColIndex);
		}
	return Score;
	}
コード例 #19
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #20
0
ファイル: stabilize.cpp プロジェクト: bigmuscle/bigmuscle
void Stabilize(const MSA &msa, MSA &msaStable)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	msaStable.SetSize(uSeqCount, uColCount);
	for (unsigned uId = 0; uId < uSeqCount; ++uId)
		{
		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
		msaStable.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			msaStable.SetChar(uId, uColIndex, c);
			}
		}
	}
コード例 #21
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
		{
		unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
		unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);
		msaOut.SetSeqId(uSeqIndexOut, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}
コード例 #22
0
ファイル: scoredist.cpp プロジェクト: Unode/ext_apps
// Similarity score
static double Sigma(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
  unsigned *ptrLength)
	{
	unsigned Length = 0;
	double Score = 0;
	const unsigned ColCount = msa.GetColCount();
	for (unsigned ColIndex = 0; ColIndex < ColCount; ++ColIndex)
		{
		unsigned Letter1 = msa.GetLetterEx(SeqIndex1, ColIndex);
		unsigned Letter2 = msa.GetLetterEx(SeqIndex2, ColIndex);
		if (Letter1 >= 20 || Letter2 >= 20)
			continue;
		++Length;
		Score += BLOSUM62[Letter1][Letter2];
		}

	*ptrLength = Length;
	return Score;
	}
コード例 #23
0
ファイル: anchors.cpp プロジェクト: ggrekhov/ugene
// Best col only if all following criteria satisfied:
// (1) Score >= min
// (2) Smoothed score >= min
// (3) No gaps.
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
  const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
  unsigned BestCols[], unsigned *ptruBestColCount)
	{
	const unsigned uColCount = msa.GetColCount();

	unsigned uBestColCount = 0;
	for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
		{
		if (Score[uIndex] < dMinScore)
			continue;
		if (SmoothScore[uIndex] < dMinSmoothScore)
			continue;
		if (msa.ColumnHasGap(uIndex))
			continue;
		BestCols[uBestColCount] = uIndex;
		++uBestColCount;
		}
	*ptruBestColCount = uBestColCount;
	}
コード例 #24
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
void WriteScoreFile(const MSA &msa)
	{
    MuscleContext *ctx = getMuscleContext();

	FILE *f = fopen(ctx->params.g_pstrScoreFileName, "w");
	if (0 == f)
		Quit("Cannot open score file '%s' errno=%d", ctx->params.g_pstrScoreFileName, errno);

	const unsigned uColCount = msa.GetColCount();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uCol = 0; uCol < uColCount; ++uCol)
		{
		double Score = GetColScore(msa, uCol);
		fprintf(f, "%10.3f  ", Score);
		for (unsigned uSeq = 0; uSeq < uSeqCount; ++uSeq)
			{
			char c = msa.GetChar(uSeq, uCol);
			fprintf(f, "%c", c);
			}
		fprintf(f, "\n");
		}
	fclose(f);
	}
コード例 #25
0
ファイル: scoregaps.cpp プロジェクト: ggrekhov/ugene
// TODO: This could be much faster, no need to look
// at all columns.
static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)
	{
    MuscleContext *ctx = getMuscleContext();
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
	const unsigned ColCount = msa.GetColCount();
	bool InGap = false;
	bool Intersects = false;
	unsigned Start = uInsane;
	for (unsigned Col = 0; Col <= ColCount; ++Col)
		{
		bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));
		if (Gap)
			{
			if (!InGap)
				{
				InGap = true;
				Start = Col;
				}
			if (g_ColDiff[Col])
				Intersects = true;
			}
		else if (InGap)
			{
			InGap = false;
			if (Intersects)
				{
				GAPINFO *GI = NewGapInfo();
				GI->Start = Start;
				GI->End = Col - 1;
				GI->Next = g_Gaps[SeqIndex];
				g_Gaps[SeqIndex] = GI;
				}
			Intersects = false;
			}
		}
	}
コード例 #26
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
void GetLetterScores(const MSA &msa, SCORE Scores[])
	{
	const unsigned uColCount = msa.GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		Scores[uColIndex] = ScoreColLetters(msa, uColIndex);
	}
コード例 #27
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping1 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping2 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen;
		scoreGaps += TermGapScore(true);
		}
	return scoreGaps;
	}
コード例 #28
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #29
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #30
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	g_ptrMSA1.get() = &msa1;
	g_ptrMSA2.get() = &msa2;
	g_uSeqIndex1.get() = uSeqIndex1;
	g_uSeqIndex2.get() = uSeqIndex2;

	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPair, different lengths");

#if	TRACE
	Log("ScoreSeqPair\n");
	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex1, i));
	Log("\n");
	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex2, i));
	Log("\n");
#endif

	SCORE scoreTotal = 0;

// Substitution scores
	unsigned uFirstLetter1 = uInsane;
	unsigned uFirstLetter2 = uInsane;
	unsigned uLastLetter1 = uInsane;
	unsigned uLastLetter2 = uInsane;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);

		if (!bGap1)
			{
			if (uInsane == uFirstLetter1)
				uFirstLetter1 = uColIndex;
			uLastLetter1 = uColIndex;
			}
		if (!bGap2)
			{
			if (uInsane == uFirstLetter2)
				uFirstLetter2 = uColIndex;
			uLastLetter2 = uColIndex;
			}

		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
			continue;

		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);

		SCORE scoreMatch = (*g_ptrScoreMatrix.get())[uLetter1][uLetter2];
		scoreTotal += scoreMatch;
#if	TRACE
		Log("%c <-> %c = %7.1f  %10.1f\n",
		  msa1.GetChar(uSeqIndex1, uColIndex),
		  msa2.GetChar(uSeqIndex2, uColIndex),
		  scoreMatch,
		  scoreTotal);
#endif
		}
	
	*ptrLetters = scoreTotal;

// Gap penalties
	unsigned uGapLength = uInsane;
	unsigned uGapStartCol = uInsane;
	bool bGapping1 = false;
	bool bGapping2 = false;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGapping1)
			{
			if (bGap1)
				++uGapLength;
			else
				{
				bGapping1 = false;
				bool bNTerm = (uFirstLetter2 == uGapStartCol);
				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			continue;
			}
		else
			{
			if (bGap1)
				{
				uGapStartCol = uColIndex;
				bGapping1 = true;
				uGapLength = 1;
				continue;
				}
			}

		if (bGapping2)
			{
			if (bGap2)
				++uGapLength;
			else
				{
				bGapping2 = false;
				bool bNTerm = (uFirstLetter1 == uGapStartCol);
				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			}
		else
			{
			if (bGap2)
				{
				uGapStartCol = uColIndex;
				bGapping2 = true;
				uGapLength = 1;
				}
			}
		}

	if (bGapping1 || bGapping2)
		{
		SCORE scoreGap = GapPenalty(uGapLength, true);
		scoreTotal += scoreGap;
#if	TRACE
		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
		Log("GAP         %7.1f  %10.1f\n",
		  scoreGap,
		  scoreTotal);
#endif
		}
	*ptrGaps = scoreTotal - *ptrLetters;
	return scoreTotal;
	}