Esempio n. 1
0
bool Seq::HasGap() const
	{
	for (CharVect::const_iterator p = begin(); p != end(); ++p)
		{
		char c = *p;
		if (IsGapChar(c))
			return true;
		}
	return false;
	}
Esempio n. 2
0
// Perecent identity of a pair of sequences.
// Positions with one or both gapped are ignored.
double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
	{
	const unsigned uColCount = GetColCount();
	unsigned uPosCount = 0;
	unsigned uSameCount = 0;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		const char c1 = GetChar(uSeqIndex1, uColIndex);
		const char c2 = GetChar(uSeqIndex2, uColIndex);
		if (IsGapChar(c1) || IsGapChar(c2))
			continue;
		if (c1 == c2)
			++uSameCount;
		++uPosCount;
		}
	if (0 == uPosCount)
		return 0;
	return (double) uSameCount / (double) uPosCount;
	}
Esempio n. 3
0
unsigned Seq::GetUngappedLength() const
	{
	unsigned uUngappedLength = 0;
	for (CharVect::const_iterator p = begin(); p != end(); ++p)
		{
		char c = *p;
		if (!IsGapChar(c))
			++uUngappedLength;
		}
	return uUngappedLength;
	}
Esempio n. 4
0
void Seq::StripGapsAndWhitespace()
	{
	for (CharVect::iterator p = begin(); p != end(); )
		{
		char c = *p;
		if (isspace(c) || IsGapChar(c))
			erase(p);
		else
			++p;
		}
	}
Esempio n. 5
0
void Seq::StripGaps()
	{
	for (CharVect::iterator p = begin(); p != end(); )
		{
		char c = *p;
		if (IsGapChar(c))
			erase(p);
		else
			++p;
		}
	}
Esempio n. 6
0
bool Seq::EqIgnoreCase(const Seq &s) const
	{
	const unsigned n = Length();
	if (n != s.Length())
		return false;
	for (unsigned i = 0; i < n; ++i)
		{
		const char c1 = at(i);
		const char c2 = s.at(i);
		if (IsGapChar(c1))
			{
			if (!IsGapChar(c2))
				return false;
			}
		else
			{
			if (toupper(c1) != toupper(c2))
				return false;
			}
		}
	return true;
	}
Esempio n. 7
0
static void SeqFromMSACols(const MSA &msa, unsigned uSeqIndex, unsigned uColFrom,
  unsigned uColTo, Seq &s)
	{
	s.Clear();
	s.SetName(msa.GetSeqName(uSeqIndex));
	s.SetId(msa.GetSeqId(uSeqIndex));
	for (unsigned uColIndex = uColFrom; uColIndex <= uColTo; ++uColIndex)
		{
		char c = msa.GetChar(uSeqIndex, uColIndex);
		if (!IsGapChar(c))
			s.AppendChar(c);
		}
	}
Esempio n. 8
0
void Seq::ExtractUngapped(MSA &msa) const
	{
	msa.Clear();
	unsigned uColCount = Length();
	msa.SetSize(1, 1);
	unsigned uUngappedPos = 0;
	for (unsigned n = 0; n < uColCount; ++n)
		{
		char c = at(n);
		if (!IsGapChar(c))
			msa.SetChar(0, uUngappedPos++, c);
		}
	msa.SetSeqName(0, m_ptrName);
	}
Esempio n. 9
0
ALPHA SeqVect::GuessAlpha() const
	{
// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
// letters belong to the nucleotide alphabet, guess nucleo.
// Otherwise amino.
	const unsigned CHAR_COUNT = 100;
	const unsigned MIN_NUCLEO_PCT = 95;

	const unsigned uSeqCount = GetSeqCount();
	if (0 == uSeqCount)
		return ALPHA_Amino;

	unsigned uSeqIndex = 0;
	unsigned uPos = 0;
	unsigned uSeqLength = GetSeqLength(0);
	unsigned uDNACount = 0;
	unsigned uRNACount = 0;
	unsigned uTotal = 0;
	const Seq *ptrSeq = &GetSeq(0);
	for (;;)
		{
		while (uPos >= uSeqLength)
			{
			++uSeqIndex;
			if (uSeqIndex >= uSeqCount)
				break;
			ptrSeq = &GetSeq(uSeqIndex);
			uSeqLength = ptrSeq->Length();
			uPos = 0;
			}
		if (uSeqIndex >= uSeqCount)
			break;
		char c = ptrSeq->at(uPos++);
		if (IsGapChar(c))
			continue;
		if (IsDNA(c))
			++uDNACount;
		if (IsRNA(c))
			++uRNACount;
		++uTotal;
		if (uTotal >= CHAR_COUNT)
			break;
		}
	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_DNA;
	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_RNA;
	return ALPHA_Amino;
	}
Esempio n. 10
0
// Return true on end-of-file
bool Seq::FromFASTAFile(TextFile &File)
	{
	Clear();

	char szLine[MAX_FASTA_LINE];
	bool bEof = File.GetLine(szLine, sizeof(szLine));
	if (bEof)
		return true;
	if ('>' != szLine[0])
		Quit("Expecting '>' in FASTA file %s line %u",
		  File.GetFileName(), File.GetLineNr());

	size_t n = strlen(szLine);
	if (1 == n)
		Quit("Missing annotation following '>' in FASTA file %s line %u",
		  File.GetFileName(), File.GetLineNr());

	m_ptrName = new char[n];
	strcpy(m_ptrName, szLine + 1);

	TEXTFILEPOS Pos = File.GetPos();
	for (;;)
		{
		bEof = File.GetLine(szLine, sizeof(szLine));
		if (bEof)
			{
			if (0 == size())
				{
				Quit("Empty sequence in FASTA file %s line %u",
				  File.GetFileName(), File.GetLineNr());
				return true;
				}
			return false;
			}
		if ('>' == szLine[0])
			{
			if (0 == size())
				Quit("Empty sequence in FASTA file %s line %u",
				  File.GetFileName(), File.GetLineNr());
		// Rewind to beginning of this line, it's the start of the
		// next sequence.
			File.SetPos(Pos);
			return false;
			}
		const char *ptrChar = szLine;
		while (char c = *ptrChar++)
			{
			if (isspace(c))
				continue;
			if (IsGapChar(c))
				continue;
			if (!IsResidueChar(c))
				{
				if (isprint(c))
					{
					char w = GetWildcardChar();
					Warning("Invalid residue '%c' in FASTA file %s line %d, replaced by '%c'",
					  c, File.GetFileName(), File.GetLineNr(), w);
					c = w;
					}
				else
					Quit("Invalid byte hex %02x in FASTA file %s line %d",
					  (unsigned char) c, File.GetFileName(), File.GetLineNr());
				}
			c = toupper(c);
			push_back(c);
			}
		Pos = File.GetPos();
		}
	}
Esempio n. 11
0
bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const
	{
	const unsigned uThisLength = Length();
	const unsigned uOtherLength = s.Length();
	
	unsigned uThisPos = 0;
	unsigned uOtherPos = 0;

	int cThis;
	int cOther;
	for (;;)
		{
		if (uThisPos == uThisLength && uOtherPos == uOtherLength)
			break;

	// Set cThis to next non-gap character in this string
	// or -1 if end-of-string.
		for (;;)
			{
			if (uThisPos == uThisLength)
				{
				cThis = -1;
				break;
				}
			else
				{
				cThis = at(uThisPos);
				++uThisPos;
				if (!IsGapChar(cThis))
					{
					cThis = toupper(cThis);
					break;
					}
				}
			}

	// Set cOther to next non-gap character in s
	// or -1 if end-of-string.
		for (;;)
			{
			if (uOtherPos == uOtherLength)
				{
				cOther = -1;
				break;
				}
			else
				{
				cOther = s.at(uOtherPos);
				++uOtherPos;
				if (!IsGapChar(cOther))
					{
					cOther = toupper(cOther);
					break;
					}
				}
			}

	// Compare characters are corresponding ungapped position
		if (cThis != cOther)
			return false;
		}
	return true;
	}