コード例 #1
0
ファイル: cons.cpp プロジェクト: Wyss/mauve-py
double MSA::GetCons(unsigned uColIndex) const
	{
	unsigned Counts[MAX_ALPHA];
	for (unsigned uLetter = 0; uLetter < g_AlphaSize.get(); ++uLetter)
		Counts[uLetter] = 0;

	unsigned uMaxCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		if (IsGap(uSeqIndex, uColIndex))
			continue;
		char c = GetChar(uSeqIndex, uColIndex);
		c = toupper(c);
		if ('X' == c || 'B' == c || 'Z' == c)
			continue;
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		unsigned uCount = Counts[uLetter] + 1;
		if (uCount > uMaxCount)
			uMaxCount = uCount;
		Counts[uLetter] = uCount;
		}

// Cons is undefined for all-gap column
	if (0 == uMaxCount)
		{
//		assert(false);
		return 1;
		}

	double dCons = (double) uMaxCount / (double) GetSeqCount();
	assert(dCons > 0 && dCons <= 1);
	return dCons;
	}
コード例 #2
0
ファイル: clwwt.cpp プロジェクト: bigmuscle/bigmuscle
void MSA::SetClustalWWeights(const Tree &tree)
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	WEIGHT *Weights = new WEIGHT[uSeqCount];

	CalcClustalWWeights(tree, Weights);

	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const WEIGHT w = Weights[n];
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
		const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
		const unsigned uSeqIndex = GetSeqIndex(uId);
#if	DEBUG
		if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
			Quit("MSA::SetClustalWWeights: names don't match");
#endif
		SetSeqWeight(uSeqIndex, w);
		}
	NormalizeWeights((WEIGHT) 1.0);

	delete[] Weights;
	}
コード例 #3
0
ファイル: fasta.cpp プロジェクト: Unode/ext_apps
void MSA::ToFASTAFile(TextFile &File) const
	{
	const unsigned uColCount = GetColCount();
	assert(uColCount > 0);
	const unsigned uLinesPerSeq = (GetColCount() - 1)/FASTA_BLOCK + 1;
	const unsigned uSeqCount = GetSeqCount();

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		File.PutString(">");
		File.PutString(GetSeqName(uSeqIndex));
		File.PutString("\n");

		unsigned n = 0;
		for (unsigned uLine = 0; uLine < uLinesPerSeq; ++uLine)
			{
			unsigned uLetters = uColCount - uLine*FASTA_BLOCK;
			if (uLetters > FASTA_BLOCK)
				uLetters = FASTA_BLOCK;
			for (unsigned i = 0; i < uLetters; ++i)
				{
				char c = GetChar(uSeqIndex, n);
				File.PutChar(c);
				++n;
				}
			File.PutChar('\n');
			}
		}
	}
コード例 #4
0
ファイル: henikoffweight.cpp プロジェクト: Wyss/mauve-py
void MSA::SetHenikoffWeights() const
	{
	const unsigned uColCount = GetColCount();
	const unsigned uSeqCount = GetSeqCount();

	if (0 == uSeqCount)
		return;
	else if (1 == uSeqCount)
		{
		m_Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uSeqCount)
		{
		m_Weights[0] = (WEIGHT) 0.5;
		m_Weights[1] = (WEIGHT) 0.5;
		return;
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] = 0.0;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		CalcHenikoffWeightsCol(uColIndex);

// Set all-gap seqs weight to 0
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (IsGapSeq(uSeqIndex))
			m_Weights[uSeqIndex] = 0.0;

	Normalize(m_Weights, uSeqCount);
	}
コード例 #5
0
ファイル: henikoffweight.cpp プロジェクト: Wyss/mauve-py
void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();

// Compute letter counts in this column
	unsigned uLetterCount[MAX_ALPHA];
	memset(uLetterCount, 0, sizeof(uLetterCount));
	unsigned uDifferentLetterCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
		if (uLetter >= 20)
			continue;
		unsigned uNewCount = uLetterCount[uLetter] + 1;
		uLetterCount[uLetter] = uNewCount;
		if (1 == uNewCount)
			++uDifferentLetterCount;
		}

// Compute weight contributions
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
		if (uLetter >= 20)
			continue;
		const unsigned uCount = uLetterCount[uLetter];
		unsigned uDenom = uCount*uDifferentLetterCount;
		if (uDenom == 0)
			continue;
		m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
		}
	}
コード例 #6
0
ファイル: seqvect.cpp プロジェクト: cran/muscle
Seq &SeqVect::GetSeqById(unsigned uId)
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned i = 0; i < uSeqCount; ++i)
		{
		if (GetSeqId(i) == uId)
			return GetSeq(i);
		}
	Quit("SeqVect::GetSeqIdByUd(%d): not found", uId);
	return (Seq &) *((Seq *) 0);
	}
コード例 #7
0
ファイル: seqvect.cpp プロジェクト: cran/muscle
unsigned SeqVect::GetSeqIdFromName(const char *Name) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned i = 0; i < uSeqCount; ++i)
		{
		if (!strcmp(Name, GetSeqName(i)))
			return GetSeqId(i);
		}
	Quit("SeqVect::GetSeqIdFromName(%s): not found", Name);
	return 0;
	}
コード例 #8
0
ファイル: setgscweights.cpp プロジェクト: bigmuscle/bigmuscle
void MSA::ListWeights() const
	{
	const unsigned uSeqCount = GetSeqCount();
	Log("Weights:\n");
	WEIGHT wTotal = 0;
	for (unsigned n = 0; n < uSeqCount; ++n)
		{
		wTotal += GetSeqWeight(n);
		Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n));
		}
	Log("Total weights = %6.3f, should be 1.0\n", wTotal);
	}
コード例 #9
0
ファイル: physeq.cpp プロジェクト: ggrekhov/ugene
void MSA::ToPhyInterleavedFile(TextFile &File) const
	{
	const unsigned SeqCount = GetSeqCount();
	const unsigned ColCount = GetColCount();

	File.PutFormat("%d %d\n", SeqCount, ColCount);

	if (0 == ColCount)
		return;

        unsigned Col = 0;
	for (;;)
		{
		const unsigned ColBlockStart = Col;
		const unsigned MaxCols = (ColBlockStart == 0) ? (BLOCKSIZE - 10) : BLOCKSIZE;

		for (unsigned Seq = 0; Seq < SeqCount; ++Seq)
			{
			if (0 == ColBlockStart)
				{
				char Name[11];
				const char *ptrName = GetSeqName(Seq);
				size_t n = strlen(ptrName);
				if (n > 10)
					n = 10;
				memcpy(Name, ptrName, n);
				Name[n] = 0;
				FixName(Name);
				File.PutFormat("%-10.10s", Name);
				}

			Col = ColBlockStart;
			for (unsigned ColsThisBlock = 0; ColsThisBlock < MaxCols; ++ColsThisBlock)
				{
				if (Col == ColCount)
					break;
				if (ColsThisBlock%10 == 0 && (0 == ColBlockStart || ColsThisBlock > 0))
					File.PutChar(' ');
				char c = GetChar(Seq, Col);
				if (isalpha(c))
					c = toupper(c);
				File.PutChar(c);
				++Col;
				}
			File.PutChar('\n');
			}
		if (Col == ColCount)
			break;
		File.PutChar('\n');
		}
	}
コード例 #10
0
ファイル: seqvect.cpp プロジェクト: cran/muscle
ALPHA SeqVect::GuessAlpha() const
	{
// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
// letters belong to the nucleotide alphabet, guess nucleo.
// Otherwise amino.
	const unsigned CHAR_COUNT = 100;
	const unsigned MIN_NUCLEO_PCT = 95;

	const unsigned uSeqCount = GetSeqCount();
	if (0 == uSeqCount)
		return ALPHA_Amino;

	unsigned uSeqIndex = 0;
	unsigned uPos = 0;
	unsigned uSeqLength = GetSeqLength(0);
	unsigned uDNACount = 0;
	unsigned uRNACount = 0;
	unsigned uTotal = 0;
	const Seq *ptrSeq = &GetSeq(0);
	for (;;)
		{
		while (uPos >= uSeqLength)
			{
			++uSeqIndex;
			if (uSeqIndex >= uSeqCount)
				break;
			ptrSeq = &GetSeq(uSeqIndex);
			uSeqLength = ptrSeq->Length();
			uPos = 0;
			}
		if (uSeqIndex >= uSeqCount)
			break;
		char c = ptrSeq->at(uPos++);
		if (IsGapChar(c))
			continue;
		if (IsDNA(c))
			++uDNACount;
		if (IsRNA(c))
			++uRNACount;
		++uTotal;
		if (uTotal >= CHAR_COUNT)
			break;
		}
	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_DNA;
	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_RNA;
	return ALPHA_Amino;
	}
コード例 #11
0
ファイル: cons.cpp プロジェクト: Wyss/mauve-py
double MSA::GetAvgCons() const
	{
	assert(GetSeqCount() > 0);
	double dSum = 0;
	unsigned uNonGapColCount = 0;
	for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
		{
		if (!IsGapColumn(uColIndex))
			{
			dSum += GetCons(uColIndex);
			++uNonGapColCount;
			}
		}
	assert(uNonGapColCount > 0);
	double dAvg = dSum / uNonGapColCount;
	assert(dAvg > 0 && dAvg <= 1);
	return dAvg;
	}
コード例 #12
0
ファイル: setblosumweights.cpp プロジェクト: Unode/ext_apps
// Return value is the group count, i.e. the effective number
// of distinctly different sequences.
unsigned MSA::CalcBLOSUMWeights(ClusterTree &BlosumCluster) const
	{
// Build distance matrix
	DistFunc DF;
	unsigned uSeqCount = GetSeqCount();
	DF.SetCount(uSeqCount);
	for (unsigned i = 0; i < uSeqCount; ++i)
		for (unsigned j = i+1; j < uSeqCount; ++j)
			{
			double dDist = GetPctIdentityPair(i, j);
			assert(dDist >= 0.0 && dDist <= 1.0);
			DF.SetDist(i, j, (float) (1.0 - dDist));
			}

// Cluster based on the distance function
	BlosumCluster.Create(DF);

// Return value is HMMer's "effective sequence count".
	return SetBLOSUMNodeWeight(BlosumCluster.GetRoot(), 1.0 - BLOSUM_DIST);
	}
コード例 #13
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();
	const char* seqName;
	memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
	WEIGHT wTotal = 0;
	FCOUNT fGap = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT w = GetSeqWeight(uSeqIndex);
		if (IsGap(uSeqIndex, uColIndex))
			{
			fGap += w;
			continue;
			}
		else if (IsWildcard(uSeqIndex, uColIndex))
			{
			const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				switch (uLetter)
					{
				case AX_B:		// D or N
					fcCounts[AX_D] += w/2;
					fcCounts[AX_N] += w/2;
					break;
				case AX_Z:		// E or Q
					fcCounts[AX_E] += w/2;
					fcCounts[AX_Q] += w/2;
					break;
				default:		// any
					{
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
					}
				break;

			case ALPHA_DNA:
			case ALPHA_RNA:
				switch (uLetter)
					{
				case AX_R:	// G or A
					fcCounts[NX_G] += w/2;
					fcCounts[NX_A] += w/2;
					break;
				case AX_Y:	// C or T/U
					fcCounts[NX_C] += w/2;
					fcCounts[NX_T] += w/2;
					break;
				default:	// any
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
				break;

			default:
				Quit("Alphabet %d not supported", g_Alpha);
				}
			continue;
			}
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);

		//BEGIN MODIFICATIONS TO MUSCLE

		int original=0;
		for(unsigned i=0; i<uColIndex; i++){
			if (i >= this->GetColCount()){break;}
			++original;
			char c = GetChar(uSeqIndex, i);
			if(c== '-'){
				original--;
			}
		}
		seqName = this->GetSeqName(uSeqIndex);
		int compositeVectPosition;
		compositeVectPosition = atoi(seqName);
		CompositeVect CV = *CVLocation;

		Composite* CVL = CV[compositeVectPosition];
		Composite C = *CVL;
		
		for(int j=0; j<21; j++){
			fcCounts[j] = w*C[original][j];
			wTotal = w*C[original][j];
		}

		//ORIGINAL MUSLCE LINE WAS:
		//fcCounts[uLetter] += w;
		//wTotal += w;
		//END MODIFICATIONS TO MUSCLE

		}
	*ptrfOcc = (float) (1.0 - fGap);

	if (bNormalize && wTotal > 0)
		{
		if (wTotal > 1.001)
			Quit("wTotal=%g\n", wTotal);
		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
			fcCounts[uLetter] /= wTotal;
//		AssertNormalized(fcCounts);
		}

	FCOUNT fcStartCount = 0;
	if (uColIndex == 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT fcEndCount = 0;
	if (uColCount - 1 == uColIndex)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT LL = 0;
	FCOUNT LG = 0;
	FCOUNT GL = 0;
	FCOUNT GG = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		WEIGHT w = GetSeqWeight(uSeqIndex);
		bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
		bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
		if (bLetterHere)
			{
			if (bLetterPrev)
				LL += w;
			else
				GL += w;
			}
		else
			{
			if (bLetterPrev)
				LG += w;
			else
				GG += w;
			}
		}

	FCOUNT fcExtendCount = 0;
	if (uColIndex > 0 && uColIndex < GetColCount() - 1)
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
			  IsGap(uSeqIndex, uColIndex + 1))
				fcExtendCount += GetSeqWeight(uSeqIndex);

	*ptrfcLL = LL;
	*ptrfcLG = LG;
	*ptrfcGL = GL;
	*ptrfcGG = GG;
	*ptrfcGapStart = fcStartCount;
	*ptrfcGapEnd = fcEndCount;
	*ptrfcGapExtend = fcExtendCount;
	}
コード例 #14
0
ファイル: msa2.cpp プロジェクト: Unode/ext_apps
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,
  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,
  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,
  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const
	{
	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();

	memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));
	WEIGHT wTotal = 0;
	FCOUNT fGap = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const WEIGHT w = GetSeqWeight(uSeqIndex);
		if (IsGap(uSeqIndex, uColIndex))
			{
			fGap += w;
			continue;
			}
		else if (IsWildcard(uSeqIndex, uColIndex))
			{
			const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
			switch (g_Alpha)
				{
			case ALPHA_Amino:
				switch (uLetter)
					{
				case AX_B:		// D or N
					fcCounts[AX_D] += w/2;
					fcCounts[AX_N] += w/2;
					break;
				case AX_Z:		// E or Q
					fcCounts[AX_E] += w/2;
					fcCounts[AX_Q] += w/2;
					break;
				default:		// any
					{
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
					}
				break;

			case ALPHA_DNA:
			case ALPHA_RNA:
				switch (uLetter)
					{
				case AX_R:	// G or A
					fcCounts[NX_G] += w/2;
					fcCounts[NX_A] += w/2;
					break;
				case AX_Y:	// C or T/U
					fcCounts[NX_C] += w/2;
					fcCounts[NX_T] += w/2;
					break;
				default:	// any
					const FCOUNT f = w/20;
					for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
						fcCounts[uLetter] += f;
					break;
					}
				break;

			default:
				Quit("Alphabet %d not supported", g_Alpha);
				}
			continue;
			}
		unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		fcCounts[uLetter] += w;
		wTotal += w;
		}
	*ptrfOcc = (float) (1.0 - fGap);

	if (bNormalize && wTotal > 0)
		{
		if (wTotal > 1.001)
			Quit("wTotal=%g\n", wTotal);
		for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
			fcCounts[uLetter] /= wTotal;
//		AssertNormalized(fcCounts);
		}

	FCOUNT fcStartCount = 0;
	if (uColIndex == 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))
				fcStartCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT fcEndCount = 0;
	if (uColCount - 1 == uColIndex)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}
	else
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))
				fcEndCount += GetSeqWeight(uSeqIndex);
		}

	FCOUNT LL = 0;
	FCOUNT LG = 0;
	FCOUNT GL = 0;
	FCOUNT GG = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		WEIGHT w = GetSeqWeight(uSeqIndex);
		bool bLetterHere = !IsGap(uSeqIndex, uColIndex);
		bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));
		if (bLetterHere)
			{
			if (bLetterPrev)
				LL += w;
			else
				GL += w;
			}
		else
			{
			if (bLetterPrev)
				LG += w;
			else
				GG += w;
			}
		}

	FCOUNT fcExtendCount = 0;
	if (uColIndex > 0 && uColIndex < GetColCount() - 1)
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&
			  IsGap(uSeqIndex, uColIndex + 1))
				fcExtendCount += GetSeqWeight(uSeqIndex);

	*ptrfcLL = LL;
	*ptrfcLG = LG;
	*ptrfcGL = GL;
	*ptrfcGG = GG;
	*ptrfcGapStart = fcStartCount;
	*ptrfcGapEnd = fcEndCount;
	*ptrfcGapExtend = fcExtendCount;
	}
コード例 #15
0
ファイル: aln.cpp プロジェクト: ggrekhov/ugene
void MSA::ToAlnFile(TextFile &File) const
	{
	if (getMuscleContext()->params.g_bClwStrict)
		File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n");
	else
		{
		File.PutString("MUSCLE ("
		  MUSCLE_MAJOR_VERSION "." MUSCLE_MINOR_VERSION ")"
		  " multiple sequence alignment\n");
		File.PutString("\n");
		}

	int iLongestNameLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char *ptrName = GetSeqName(uSeqIndex);
		const char *ptrBlank = strchr(ptrName, ' ');
		int iLength;
		if (0 != ptrBlank)
			iLength = (int) (ptrBlank - ptrName);
		else
			iLength = (int) strlen(ptrName);
		if (iLength > iLongestNameLength)
			iLongestNameLength = iLength;
		}
	if (iLongestNameLength > MAX_NAME)
		iLongestNameLength = MAX_NAME;
	if (iLongestNameLength < MIN_NAME)
		iLongestNameLength = MIN_NAME;

	unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1;
	for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex)
		{
		File.PutString("\n");
		unsigned uStartColIndex = uLineIndex*uCharsPerLine;
		unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1;
		if (uEndColIndex >= GetColCount())
			uEndColIndex = GetColCount() - 1;
		char Name[MAX_NAME+1];
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			const char *ptrName = GetSeqName(uSeqIndex);
			const char *ptrBlank = strchr(ptrName, ' ');
			int iLength;
			if (0 != ptrBlank)
				iLength = (int) (ptrBlank - ptrName);
			else
				iLength = (int) strlen(ptrName);
			if (iLength > MAX_NAME)
				iLength = MAX_NAME;
			memset(Name, ' ', MAX_NAME);
			memcpy(Name, ptrName, iLength);
			Name[iLongestNameLength] = 0;

			File.PutFormat("%s      ", Name);
			for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
			  ++uColIndex)
				{
				const char c = GetChar(uSeqIndex, uColIndex);
				File.PutFormat("%c", toupper(c));
				}
			File.PutString("\n");
			}

		memset(Name, ' ', MAX_NAME);
		Name[iLongestNameLength] = 0;
		File.PutFormat("%s      ", Name);
		for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex;
		  ++uColIndex)
			{
			const char c = GetAlnConsensusChar(*this, uColIndex);
			File.PutChar(c);
			}
		File.PutString("\n");
		}
	}