コード例 #1
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
static void SeqVectFromMSACols(const MSA &msa, unsigned uColFrom, unsigned uColTo,
  SeqVect &v)
	{
	v.Clear();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq s;
		SeqFromMSACols(msa, uSeqIndex, uColFrom, uColTo, s);
		v.AppendSeq(s);
		}
	}
コード例 #2
0
ファイル: estring.cpp プロジェクト: Unode/ext_apps
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}
コード例 #3
0
ファイル: stabilize.cpp プロジェクト: bigmuscle/bigmuscle
void Stabilize(const MSA &msa, MSA &msaStable)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	msaStable.SetSize(uSeqCount, uColCount);
	for (unsigned uId = 0; uId < uSeqCount; ++uId)
		{
		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
		msaStable.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			msaStable.SetChar(uId, uColIndex, c);
			}
		}
	}
コード例 #4
0
ファイル: aligngivenpathsw.cpp プロジェクト: Wyss/mauve-py
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif
	const unsigned uLengthA = msaA.GetColCount();
	const unsigned uLengthB = msaB.GetColCount();

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}
コード例 #5
0
ファイル: muscleout.cpp プロジェクト: Unode/ext_apps
void MuscleOutput(MSA &msa)
	{
	MHackEnd(msa);
	if (g_bStable)
		{
		MSA msaStable;
		Stabilize(msa, msaStable);
		msa.Clear();	// save memory
		DoOutput(msaStable);
		}
	else
		DoOutput(msa);
	}
コード例 #6
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void prepareAlignResults(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& ma, bool mhack) {
    if (mhack) {
        MHackEnd(msa);
    }
    MuscleContext* ctx = getMuscleContext();
    if (ctx->params.g_bStable) {
        MSA msaStable;
        Stabilize(msa, msaStable);
        msa.Clear();
        convertMSA2MAlignment(msaStable, al, ma);
    } else {
        convertMSA2MAlignment(msa, al, ma);
    }
}
コード例 #7
0
ファイル: writescorefile.cpp プロジェクト: ggrekhov/ugene
void WriteScoreFile(const MSA &msa)
	{
    MuscleContext *ctx = getMuscleContext();

	FILE *f = fopen(ctx->params.g_pstrScoreFileName, "w");
	if (0 == f)
		Quit("Cannot open score file '%s' errno=%d", ctx->params.g_pstrScoreFileName, errno);

	const unsigned uColCount = msa.GetColCount();
	const unsigned uSeqCount = msa.GetSeqCount();
	for (unsigned uCol = 0; uCol < uColCount; ++uCol)
		{
		double Score = GetColScore(msa, uCol);
		fprintf(f, "%10.3f  ", Score);
		for (unsigned uSeq = 0; uSeq < uSeqCount; ++uSeq)
			{
			char c = msa.GetChar(uSeq, uCol);
			fprintf(f, "%c", c);
			}
		fprintf(f, "\n");
		}
	fclose(f);
	}
コード例 #8
0
ファイル: scoregaps.cpp プロジェクト: ggrekhov/ugene
// TODO: This could be much faster, no need to look
// at all columns.
static void FindIntersectingGaps(const MSA &msa, unsigned SeqIndex)
	{
    MuscleContext *ctx = getMuscleContext();
    GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps;
    bool* &g_ColDiff = ctx->scoregaps.g_ColDiff;
	const unsigned ColCount = msa.GetColCount();
	bool InGap = false;
	bool Intersects = false;
	unsigned Start = uInsane;
	for (unsigned Col = 0; Col <= ColCount; ++Col)
		{
		bool Gap = ((Col != ColCount) && msa.IsGap(SeqIndex, Col));
		if (Gap)
			{
			if (!InGap)
				{
				InGap = true;
				Start = Col;
				}
			if (g_ColDiff[Col])
				Intersects = true;
			}
		else if (InGap)
			{
			InGap = false;
			if (Intersects)
				{
				GAPINFO *GI = NewGapInfo();
				GI->Start = Start;
				GI->End = Col - 1;
				GI->Next = g_Gaps[SeqIndex];
				g_Gaps[SeqIndex] = GI;
				}
			Intersects = false;
			}
		}
	}
コード例 #9
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
static SCORE ScoreLetters(const MSA &msa, const unsigned Edges[],
  unsigned uEdgeCount)
	{
	const unsigned uColCount = msa.GetColCount();

// Letters
	SCORE Score = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const unsigned uColIndex = Edges[uEdgeIndex];
		assert(uColIndex < uColCount);
		Score += ScoreColLetters(msa, uColIndex);
		}
	return Score;
	}
コード例 #10
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// Return true if the given column has no gaps and all
// its residues are in the same biochemical group.
bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)
	{
	extern unsigned ResidueGroup[];

	const unsigned uSeqCount = msa.GetColCount();
	if (0 == uSeqCount)
		Quit("MSAColIsConservative: empty alignment");

	if (msa.IsGap(0, uColIndex))
		return false;

	unsigned uLetter = msa.GetLetterEx(0, uColIndex);
	const unsigned uGroup = ResidueGroup[uLetter];

	for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (msa.IsGap(uSeqIndex, uColIndex))
			return false;
		uLetter = msa.GetLetter(uSeqIndex, uColIndex);
		if (ResidueGroup[uLetter] != uGroup)
			return false;
		}
	return true;
	}
コード例 #11
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void convertMAlignment2MSA(MSA& muscleMSA, const MultipleSequenceAlignment& ma, bool fixAlpha) {    
    MuscleContext *ctx = getMuscleContext();
    ctx->fillUidsVectors(ma->getNumRows());
    for (int i=0, n = ma->getNumRows(); i<n; i++) {
        const MultipleSequenceAlignmentRow row = ma->getMsaRow(i);
        
        int coreLen = row->getCoreLength();
        int maLen = ma->getLength();
        char* seq  = new char[maLen + 1];
        memcpy(seq, row->getCore().constData(), coreLen);
        memset(seq + coreLen, '-', maLen - coreLen + 1);
        seq[maLen] = 0;

        char* name = new char[row->getName().length() + 1];
        memcpy(name, row->getName().toLocal8Bit().constData(), row->getName().length());
        name[row->getName().length()] = '\0';
        
        muscleMSA.AppendSeq(seq, maLen, name);
        ctx->tmp_uIds[i] = ctx->input_uIds[i];
    }
    if (fixAlpha) {
        muscleMSA.FixAlpha();
    }
}
コード例 #12
0
ファイル: dosp.cpp プロジェクト: ggrekhov/ugene
void DoSP()
	{
    MuscleContext *ctx = getMuscleContext();

	TextFile f(ctx->params.g_pstrSPFileName);

	MSA a;
	a.FromFile(f);

	ALPHA Alpha = ALPHA_Undefined;
	switch (ctx->params.g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = a.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);
	a.FixAlpha();

	SetPPScore();

	const unsigned uSeqCount = a.GetSeqCount();
	if (0 == uSeqCount)
		Quit("No sequences in input file %s", ctx->params.g_pstrSPFileName);

	MSA::SetIdCount(uSeqCount);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		a.SetSeqId(uSeqIndex, uSeqIndex);

	SetSeqWeightMethod(ctx->params.g_SeqWeight1);
	Tree tree;
	TreeFromMSA(a, tree, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2);
	SetMuscleTree(tree);
	SetMSAWeightsMuscle((MSA &) a);

	SCORE SP = ObjScoreSP(a);

	Log("File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP);
	fprintf(stderr, "File=%s;SP=%.4g\n", ctx->params.g_pstrSPFileName, SP);
	}
コード例 #13
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
		{
		unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
		unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);
		msaOut.SetSeqId(uSeqIndexOut, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}
コード例 #14
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
void StripGapColumns( MSA& msa )
	{
	unsigned uCurCol = 0;
	for( unsigned uColIndex = 0; uColIndex < msa.GetColCount(); uColIndex++ )
		{
		if( !msa.IsGapColumn(uColIndex) )
			{
			for( unsigned uGapSeq = 0; uGapSeq < msa.GetSeqCount(); uGapSeq++ )
				{
				msa.SetChar(uGapSeq, uCurCol, msa.GetChar(uGapSeq,uColIndex));
				}
			uCurCol++;
			}
		}
	msa.DeleteColumns(uCurCol, msa.GetColCount()-uCurCol);
	}
コード例 #15
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
// Append msa2 at the end of msa1
void MSAAppend(MSA &msa1, const MSA &msa2)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uId = msa1.GetSeqId(uSeqIndex);
		unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
			{
			const char c = msa2.GetChar(uSeqIndex2, uColIndex);
			msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
			}
		}
	}
コード例 #16
0
ファイル: aligngivenpathsw.cpp プロジェクト: ggrekhov/ugene
static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
    MuscleContext *ctx = getMuscleContext();
    char *g_UnalignChar = ctx->alpha.g_UnalignChar;
#if	TRACE
	Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}
コード例 #17
0
ファイル: msa2.cpp プロジェクト: ggrekhov/ugene
void SetThreeWayWeightsMuscle(MSA &msa)
	{
    MuscleContext *ctx =getMuscleContext();
    const Tree* &g_ptrMuscleTree = ctx->msa2.g_ptrMuscleTree;
    unsigned &g_uTreeSplitNode1 = ctx->msa2.g_uTreeSplitNode1;
    unsigned &g_uTreeSplitNode2 = ctx->msa2.g_uTreeSplitNode2;

	if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)
		{
		msa.SetHenikoffWeightsPB();
		return;
		}

	const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();
	WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];

	CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,
	  Weights);

	const unsigned uMSASeqCount = msa.GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)
		{
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		if (uId >= uMuscleSeqCount)
			Quit("SetThreeWayWeightsMuscle: id out of range");
		msa.SetSeqWeight(uSeqIndex, Weights[uId]);
		}
#if	LOCAL_VERBOSE
	{
	Log("SetThreeWayWeightsMuscle\n");
	for (unsigned n = 0; n < uMSASeqCount; ++n)
		{
		const unsigned uId = msa.GetSeqId(n);
		Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);
		}
	}
#endif
	msa.NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
コード例 #18
0
ファイル: MuscleUtils.cpp プロジェクト: ggrekhov/ugene
void convertMSA2MAlignment(MSA& msa, const DNAAlphabet* al, MultipleSequenceAlignment& res) {
    assert(res->isEmpty());
    MuscleContext *ctx = getMuscleContext();
    res->setAlphabet(al);
    ctx->output_uIds.clear();

    for(int i=0, n = msa.GetSeqCount(); i < n; i++) {
        QString name = msa.GetSeqName(i);
        QByteArray seq;
        seq.reserve(msa.GetColCount());
        for (int j = 0, m = msa.GetColCount(); j < m ; j++) {
            char c = msa.GetChar(i, j);
            seq.append(c);
        }
        ctx->output_uIds.append(ctx->tmp_uIds[msa.GetSeqId(i)]);
        res->addRow(name, seq);
    }
}
コード例 #19
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #20
0
ファイル: objscoreda.cpp プロジェクト: Wyss/mauve-py
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	msa.LogMe();
	Log("     Score  Weight  Weight       Total\n");
	Log("----------  ------  ------  ----------\n");
#endif
	SCORE TotalLetters = 0;
	SCORE TotalGaps = 0;
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;
			SCORE Letters;
			SCORE Gaps;
			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
			  &Letters, &Gaps);
			scoreTotal += w1*w2*scorePair;
			TotalLetters += w1*w2*Letters;
			TotalGaps += w1*w2*Gaps;
			++uPairCount;
#if	TRACE
			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
			  scorePair,
			  w1,
			  w2,
			  scorePair*w1*w2,
			  uSeqIndex1,
			  msa.GetSeqName(uSeqIndex1),
			  uSeqIndex2,
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
	*ptrLetters = TotalLetters;
	*ptrGaps = TotalGaps;
	return scoreTotal;
	}
コード例 #21
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #22
0
ファイル: domuscle.cpp プロジェクト: bigmuscle/bigmuscle
void DoMuscle(CompositeVect*CVLocation)
	{
	SetOutputFileName(g_pstrOutFileName);
	SetInputFileName(g_pstrInFileName);

	SetMaxIters(g_uMaxIters);
	SetSeqWeightMethod(g_SeqWeight1);

	TextFile fileIn(g_pstrInFileName);
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

	PTR_SCOREMATRIX UserMatrix = 0;
	if (0 != g_pstrMatrixFileName)
		{
		const char *FileName = g_pstrMatrixFileName;
		const char *Path = getenv("MUSCLE_MXPATH");
		if (Path != 0)
			{
			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
			char *NewFileName = new char[n];
			sprintf(NewFileName, "%s/%s", Path, FileName);
			FileName = NewFileName;
			}
		TextFile File(FileName);
		UserMatrix = ReadMx(File);
		g_Alpha = ALPHA_Amino;
		g_PPScore = PPSCORE_SP;
		}

	SetPPScore();

	if (0 != UserMatrix)
		g_ptrScoreMatrix = UserMatrix;

	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags = g_bDiags1;
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName);
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName, true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName)
	{
	// Discourage users...
		if (!g_bUseTreeNoWarn)
			fprintf(stderr, "%s", g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName);
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
		  g_pstrDistMxFileName1);

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly)
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	msa.SetCompositeVector(CVLocation);
	ProgNode *ProgNodes = 0;
	if (g_bLow)
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName)
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName, msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName, true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName)
		{
		g_bDiags = g_bDiags2;
		SetIter(2);

		if (g_bLow)
			{
			if (0 != g_uMaxTreeRefineIters)
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2);
	SetMuscleTree(GuideTree);

	if (g_bAnchors)
		RefineVert(msa, GuideTree, g_uMaxIters - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName, true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
コード例 #23
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// "Catenate" two MSAs (by bad analogy with UNIX cat command).
// msa1 and msa2 must have same sequence names, but possibly
// in a different order.
// msaCat is the combined alignment produce by appending
// sequences in msa2 to sequences in msa1.
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	const unsigned uColCountCat = uColCount1 + uColCount2;

	msaCat.SetSize(uSeqCount, uColCountCat);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
			{
			const char c = msa1.GetChar(uSeqIndex, uColIndex);
			msaCat.SetChar(uSeqIndex, uColIndex, c);
			}

		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
		unsigned uSeqIndex2;
		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #24
0
ファイル: refinesubfams.cpp プロジェクト: ggrekhov/ugene
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
	{
    MuscleContext *ctx = getMuscleContext();
    CLUSTER &g_Cluster2 = ctx->params.g_Cluster2;
    DISTANCE &g_Distance2 =  ctx->params.g_Distance2;
    ROOT &g_Root2 = ctx->params.g_Root2;
    bool &g_bAnchors = ctx->params.g_bAnchors;

	const unsigned uSeqCount = msa.GetSeqCount();
	if (uSeqCount < 3)
		return false;

	const double dMaxHeight = 0.6;
	const unsigned uMaxSubfamCount = 16;
    //const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams;
	unsigned uSubfamCount;
	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
	assert(uSubfamCount <= uSeqCount);

	if (ctx->params.g_bVerbose)
		LogSubfams(tree, Subfams, uSubfamCount);

	MSA *SubfamMSAs = new MSA[uSubfamCount];
	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned *Ids = new unsigned[uSeqCount];

	bool bAnyChanges = false;
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfam = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
		assert(uLeafCount <= uSeqCount);

		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);

		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
		DeleteGappedCols(msaSubfam);

#if	TRACE
		Log("Subfam %u MSA=\n", uSubfamIndex);
		msaSubfam.LogMe();
#endif

		if (msaSubfam.GetSeqCount() <= 2)
			continue;

	// TODO /////////////////////////////////////////
	// Try using existing tree, may actually hurt to
	// re-estimate, may also be a waste of CPU & mem.
	/////////////////////////////////////////////////
		Tree SubfamTree;
		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);

		bool bAnyChangesThisSubfam;
		if (g_bAnchors)
			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
		else
			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if	TRACE
		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
		if (bAnyChangesThisSubfam)
			bAnyChanges = true;
		}

	if (bAnyChanges)
		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);

	delete[] Leaves;
	delete[] Subfams;
	delete[] SubfamMSAs;

	return bAnyChanges;
	}
コード例 #25
0
ファイル: refinesubfams.cpp プロジェクト: ggrekhov/ugene
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	bool *Ready = new bool[uNodeCount];
	MSA **MSAs = new MSA *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		Ready[uNodeIndex] = false;
		MSAs[uNodeIndex] = 0;
		}

	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uNodeIndex = Subfams[uSubfamIndex];
		Ready[uNodeIndex] = true;
		MSA *ptrMSA = new MSA;
	// TODO: Wasteful copy, needs re-design
		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
		MSAs[uNodeIndex] = ptrMSA;
		}

	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex;
	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;

		unsigned uRight = tree.GetRight(uNodeIndex);
		unsigned uLeft = tree.GetLeft(uNodeIndex);
		if (!Ready[uRight] || !Ready[uLeft])
			continue;

		MSA *ptrLeft = MSAs[uLeft];
		MSA *ptrRight = MSAs[uRight];
		assert(ptrLeft != 0 && ptrRight != 0);

		MSA *ptrParent = new MSA;

		PWPath Path;
		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);

		MSAs[uNodeIndex] = ptrParent;
		Ready[uNodeIndex] = true;
		Ready[uLeft] = false;
		Ready[uRight] = false;

		delete MSAs[uLeft];
		delete MSAs[uRight];
		MSAs[uLeft] = 0;
		MSAs[uRight] = 0;
		}

#if	DEBUG
	{
	unsigned uReadyCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Ready[uNodeIndex])
			{
			assert(tree.IsRoot(uNodeIndex));
			++uReadyCount;
			assert(0 != MSAs[uNodeIndex]);
			}
		else
			assert(0 == MSAs[uNodeIndex]);
		}
	assert(1 == uReadyCount);
	}
#endif

	const unsigned uRoot = tree.GetRootNodeIndex();
	MSA *ptrRootAlignment = MSAs[uRoot];

	msa.Copy(*ptrRootAlignment);

	delete ptrRootAlignment;
    delete[] Ready;

#if	TRACE
	Log("After refine subfamilies, root alignment=\n");
	msa.LogMe();
#endif
	}
コード例 #26
0
ファイル: makerootmsa.cpp プロジェクト: Unode/ext_apps
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
コード例 #27
0
ファイル: refine.cpp プロジェクト: Wyss/mauve-py
void Refine()
	{
	SetOutputFileName(g_pstrOutFileName.get());
	SetInputFileName(g_pstrInFileName.get());
	SetStartTime();

	SetMaxIters(g_uMaxIters.get());
	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile fileIn(g_pstrInFileName.get());
	MSA msa;
	msa.FromFile(fileIn);

	const unsigned uSeqCount = msa.GetSeqCount();
	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType.get())
		{
	case SEQTYPE_Auto:
		Alpha = msa.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid SeqType");
		}
	SetAlpha(Alpha);
	msa.FixAlpha();

	SetPPScore();
	if (ALPHA_DNA == Alpha || ALPHA_RNA == Alpha)
		SetPPScore(PPSCORE_SPN);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		msa.SetSeqId(uSeqIndex, uSeqIndex);
	SetMuscleInputMSA(msa);

	Tree GuideTree;
	TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());
	SetMuscleTree(GuideTree);

	if (g_bAnchors.get())
		RefineVert(msa, GuideTree, g_uMaxIters.get());
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters.get(), false, false);

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

//	TextFile fileOut(g_pstrOutFileName.get(), true);
//	msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
コード例 #28
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
	{
#if	TRACE
	Log("==================ObjScoreSP==============\n");
	Log("msa=\n");
	msa.LogMe();
#endif
	g_SPScoreLetters = 0;
	g_SPScoreGaps = 0;

	if (0 != MatchScore)
		{
		const unsigned uColCount = msa.GetColCount();
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			MatchScore[uColIndex] = 0;
		}

	const unsigned uSeqCount = msa.GetSeqCount();
	SCORE scoreTotal = 0;
	unsigned uPairCount = 0;
#if	TRACE
	Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");
	Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");
#endif
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
			{
			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
			const WEIGHT w = w1*w2;

			SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
			SCORE scorePair = scoreLetters + scoreGaps;
			++uPairCount;

			scoreTotal += w*scorePair;

			g_SPScoreLetters += w*scoreLetters;
			g_SPScoreGaps += w*scoreGaps;
#if	TRACE
			Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",
			  uSeqIndex1,
			  uSeqIndex2,
			  w1,
			  w2,
			  scoreLetters,
			  scoreGaps,
			  scorePair,
			  scorePair*w1*w2,
			  scoreTotal,
			  msa.GetSeqName(uSeqIndex1),
			  msa.GetSeqName(uSeqIndex2));
#endif
			}
		}
#if	TEST_SPFAST
	{
	SCORE f = ObjScoreSPFast(msa);
	Log("Fast  = %.6g\n", f);
	Log("Brute = %.6g\n", scoreTotal);
	if (BTEq(f, scoreTotal))
		Log("Agree\n");
	else
		Log("** DISAGREE **\n");
	}
#endif
//	return scoreTotal / uPairCount;
	return scoreTotal;
	}
コード例 #29
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping1 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping2 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen;
		scoreGaps += TermGapScore(true);
		}
	return scoreGaps;
	}
コード例 #30
0
ファイル: diffobjscore.cpp プロジェクト: ggrekhov/ugene
SCORE DiffObjScore(
  const MSA &msa1, const PWPath &Path1, const unsigned Edges1[], unsigned uEdgeCount1, 
  const MSA &msa2, const PWPath &Path2, const unsigned Edges2[], unsigned uEdgeCount2)
	{
#if	TRACE
	{
	Log("============DiffObjScore===========\n");
	Log("msa1:\n");
	msa1.LogMe();
	Log("\n");
	Log("Cols1: ");
	for (unsigned i = 0; i < uEdgeCount1; ++i)
		Log(" %u", Edges1[i]);
	Log("\n\n");
	Log("msa2:\n");
	msa2.LogMe();
	Log("Cols2: ");
	for (unsigned i = 0; i < uEdgeCount2; ++i)
		Log(" %u", Edges2[i]);
	Log("\n\n");
	}
#endif

#if	COMPARE_3_52
	extern SCORE g_SPScoreLetters;
	extern SCORE g_SPScoreGaps;
	SCORE SP1 = ObjScoreSP(msa1);
	SCORE SPLetters1 = g_SPScoreLetters;
	SCORE SPGaps1 = g_SPScoreGaps;

	SCORE SP2 = ObjScoreSP(msa2);
	SCORE SPLetters2 = g_SPScoreLetters;
	SCORE SPGaps2 = g_SPScoreGaps;
	SCORE SPDiffLetters = SPLetters2 - SPLetters1;
	SCORE SPDiffGaps = SPGaps2 - SPGaps1;
	SCORE SPDiff = SPDiffLetters + SPDiffGaps;
#endif

	SCORE Letters1 = ScoreLetters(msa1, Edges1, uEdgeCount1);
	SCORE Letters2 = ScoreLetters(msa2, Edges2, uEdgeCount2);

	SCORE Gaps1 = ScoreGaps(msa1, Edges1, uEdgeCount1);
	SCORE Gaps2 = ScoreGaps(msa2, Edges2, uEdgeCount2);

	SCORE DiffLetters = Letters2 - Letters1;
	SCORE DiffGaps = Gaps2 - Gaps1;
	SCORE Diff = DiffLetters + DiffGaps;

#if	COMPARE_3_52
	Log("ObjScoreSP    Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  SPLetters1, SPLetters2, SPDiffLetters);

	Log("DiffObjScore  Letters1=%.4g  Letters2=%.4g  DiffLetters=%.4g\n",
	  Letters1, Letters2, DiffLetters);

	Log("ObjScoreSP    Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  SPGaps1, SPGaps2, SPDiffGaps);

	Log("DiffObjScore  Gaps1=%.4g  Gaps2=%.4g  DiffGaps=%.4g\n",
	  Gaps1, Gaps2, DiffGaps);

	Log("SP diff=%.4g DiffObjScore Diff=%.4g\n", SPDiff, Diff);
#endif

	return Diff;
	}